From 45596133c6f8c7122ff7497bc9f3f7a6059e197a Mon Sep 17 00:00:00 2001 From: kaidenjessani1 <157517498+kaidenjessani1@users.noreply.github.com> Date: Mon, 29 Jan 2024 19:44:03 -0500 Subject: [PATCH 01/20] test_sql_queries.py --- tests/test_sql_queries.py | 114 +++++++++++++++++++++++++++----------- 1 file changed, 82 insertions(+), 32 deletions(-) diff --git a/tests/test_sql_queries.py b/tests/test_sql_queries.py index 22b25d546..e80478c2b 100644 --- a/tests/test_sql_queries.py +++ b/tests/test_sql_queries.py @@ -1,55 +1,105 @@ import unittest -import psycopg2 # Replace with appropriate database connector based on your database +import pandas as pd +import sys +from pathlib import Path +print(sys.path) +sys.path.append(str((Path(__file__).parent).parent)) +print('\n') +print('\n') +print(sys.path) +from create_and_populate_database import DatabaseSetup class TestSQLQueries(unittest.TestCase): def setUp(self): # Establish a connection to your test database - self.conn = psycopg2.connect( - dbname='your_dbname', - user='your_username', - password='your_password', - host='your_host', - port='your_port' - ) - self.cur = self.conn.cursor() - + db_set = DatabaseSetup() + db_set.databaseConnection() + self.conn, self.cur = db_set.conn, db_set.cur + def tearDown(self): # Close the database connection self.cur.close() self.conn.close() - def test_task1(self): - # Task 1: Example SQL query in task1.sql - with open('/sql/task1.sql', 'r') as file: + def get_queries(self, path): + ''' + Method fetches queries to check + --- + Params: + path (str): Path to a file with queries + + Returns: + sep_queries (list): List of queries + ''' + with open(path, 'r') as file: sql_query = file.read() + + sep_queries = sql_query.split('\n\n') + + return sep_queries[:4] - self.cur.execute(sql_query) - result = self.cur.fetchall() + def get_result_for_all_problems_in_task(self, queries_to_check, task_n): + ''' + Method iterate through queries for all problems in a task and compares results with expected results + --- + Params: + queries_to_check (list): list of queries to check + task_n (int): task number + + Returns: + task_result (bool): Method returns True or False + True - if all results matches expected results in the task; + False - otherwise + ''' + task_result = True + + add = 1 # Because iterator starts from 0 + if task_n==2: + add=5 # Problems starts from 5. for task 2 + elif task_n==3: + add=9 # Problems starts from 9. for task 3 + + for iter in range(len(queries_to_check)): + print(f"\n---------------------------------------------------------------------- \ + \nTASK {task_n}. PROBLEM {iter+add}") + result = pd.read_sql_query(queries_to_check[iter], self.conn) + path = 'results/problem'+str(iter+add) + expected_result = pd.read_csv(path) + print(f'RESULT: \n {result}') + print(f'\nEXPECTED RESULT: \n {expected_result}') + print(f'\nActual and expected are equal: {expected_result.equals(result)}') + task_result&=expected_result.equals(result) + + return task_result - # Define expected outcome for Task 1 and compare - expected_result = [ - # Define expected rows or values here based on the query output - ] + def test_task1(self): + # Task 1: Example SQL query in task1.sql + queries_task1 = self.get_queries(path = 'sql/task1.sql') + task1_result = self.get_result_for_all_problems_in_task(queries_to_check = queries_task1, task_n = 1) - self.assertEqual(result, expected_result, "Task 1: Query output doesn't match expected result.") + # Final check + self.assertEqual(True,task1_result, "Task 1: Query output doesn't match expected result.") def test_task2(self): # Task 2: Example SQL query in task2.sql - with open('/sql/task2.sql', 'r') as file: - sql_query = file.read() - - self.cur.execute(sql_query) - result = self.cur.fetchall() + queries_task2 = self.get_queries(path = 'sql/task2.sql') + task2_result = self.get_result_for_all_problems_in_task(queries_to_check = queries_task2, task_n = 2) - # Define expected outcome for Task 2 and compare - expected_result = [ - # Define expected rows or values here based on the query output - ] + # Final check + self.assertEqual(True,task2_result, "Task 2: Query output doesn't match expected result.") - self.assertEqual(result, expected_result, "Task 2: Query output doesn't match expected result.") + def test_task3(self): + # Task 3: Example SQL query in task3.sql + queries_task3 = self.get_queries(path = 'sql/task3.sql') + task3_result = self.get_result_for_all_problems_in_task(queries_to_check = queries_task3, task_n = 3) - # Add more test methods for additional SQL tasks + # Final check + self.assertEqual(True,task3_result, "Task 3: Query output doesn't match expected result.") if __name__ == '__main__': - unittest.main() \ No newline at end of file + db_set = DatabaseSetup() + db_set.databaseConnection() + db_set.createTables() + db_set.populateData() + unittest.main() From 1c8e6c811a4a58f35aa08516dcf7e0c032e2d052 Mon Sep 17 00:00:00 2001 From: kaidenjessani1 <157517498+kaidenjessani1@users.noreply.github.com> Date: Mon, 29 Jan 2024 19:47:18 -0500 Subject: [PATCH 02/20] create_and_populate_database.py --- tests/create_and_populate_database.py | 62 +++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 tests/create_and_populate_database.py diff --git a/tests/create_and_populate_database.py b/tests/create_and_populate_database.py new file mode 100644 index 000000000..83fc29a11 --- /dev/null +++ b/tests/create_and_populate_database.py @@ -0,0 +1,62 @@ +import sqlite3 +import pandas as pd + +class DatabaseSetup(): + def databaseConnection(self): + ''' + Method extablishes connection to database + --- + Returns: + (tuple): connection and cursor + ''' + + + self.conn=None + try: + self.conn = sqlite3.connect(database='my_database') + print(f'successful SQLite connection with id {id(self.conn)}') + except: + print('Error occurred.') + + self.cur = self.conn.cursor() + return self.conn, self.cur + + def createTables(self): + ''' + Method builds schema, creates tables + --- + ''' + with open('sql/schema.sql', 'r') as file: + sql_query = file.read() + + sep_queries = sql_query.split(';\n\n') + + for q in sep_queries: + result = self.conn.execute(q) + + def populateData(self): + ''' + Method populate data in database + ''' + categories_data_df = pd.read_csv('data/category_data.csv') + products_data_df = pd.read_csv('data/product_data.csv') + users_data_df = pd.read_csv('data/user_data.csv') + orders_data_df = pd.read_csv('data/order_data.csv') + order_Items_data_df = pd.read_csv('data/order_items_data.csv') + reviews_data_df = pd.read_csv('data/review_data.csv') + cart_data_df = pd.read_csv('data/cart_data.csv') + cart_Items_data_df = pd.read_csv('data/cart_item_data.csv') + payments_data_df = pd.read_csv('data/payment_data.csv') + shipping_data_df = pd.read_csv('data/shipping_data.csv') + + categories_data_df.to_sql('Categories', self.conn, if_exists='replace', index=False) + products_data_df.to_sql('Products', self.conn, if_exists='replace', index=False) + users_data_df.to_sql('Users', self.conn, if_exists='replace', index=False) + orders_data_df.to_sql('Orders', self.conn, if_exists='replace', index=False) + order_Items_data_df.to_sql('Order_Items', self.conn, if_exists='replace', index=False) + reviews_data_df.to_sql('Reviews', self.conn, if_exists='replace', index=False) + cart_data_df.to_sql('Cart', self.conn, if_exists='replace', index=False) + cart_Items_data_df.to_sql('Cart_Items', self.conn, if_exists='replace', index=False) + payments_data_df.to_sql('Payments', self.conn, if_exists='replace', index=False) + shipping_data_df.to_sql('Shipping', self.conn, if_exists='replace', index=False) + From 027e15af7a5f3ab58497a864209a40b7bfbdeba7 Mon Sep 17 00:00:00 2001 From: kaidenjessani1 <157517498+kaidenjessani1@users.noreply.github.com> Date: Mon, 29 Jan 2024 19:47:37 -0500 Subject: [PATCH 03/20] Update test_sql_queries.py --- tests/test_sql_queries.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/test_sql_queries.py b/tests/test_sql_queries.py index e80478c2b..6042301c4 100644 --- a/tests/test_sql_queries.py +++ b/tests/test_sql_queries.py @@ -2,11 +2,7 @@ import pandas as pd import sys from pathlib import Path -print(sys.path) sys.path.append(str((Path(__file__).parent).parent)) -print('\n') -print('\n') -print(sys.path) from create_and_populate_database import DatabaseSetup class TestSQLQueries(unittest.TestCase): From cb8f1fafd8374a9e5bbd144b018081dcd8cf2099 Mon Sep 17 00:00:00 2001 From: kaidenjessani1 <157517498+kaidenjessani1@users.noreply.github.com> Date: Mon, 29 Jan 2024 19:49:34 -0500 Subject: [PATCH 04/20] task1.sql --- sql/task1.sql | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/sql/task1.sql b/sql/task1.sql index 90de336ca..fcfd6a52e 100644 --- a/sql/task1.sql +++ b/sql/task1.sql @@ -1,14 +1,47 @@ -- Problem 1: Retrieve all products in the Sports category -- Write an SQL query to retrieve all products in a specific category. +-- Problem 1 +SELECT p.* FROM Products p + LEFT JOIN Categories c + ON p.category_id=c.category_id + WHERE LOWER(c.category_name) like '%sports%' -- Problem 2: Retrieve the total number of orders for each user -- Write an SQL query to retrieve the total number of orders for each user. -- The result should include the user ID, username, and the total number of orders. +-- Problem 2 +SELECT u.user_id, u.username, count(o.order_id) FROM Users u + JOIN Orders o + ON u.user_id=o.user_id + GROUP BY u.user_id -- Problem 3: Retrieve the average rating for each product -- Write an SQL query to retrieve the average rating for each product. -- The result should include the product ID, product name, and the average rating. +-- Problem 3 +SELECT r.product_id, p.product_name, AVG(r.rating) FROM Products p + JOIN Reviews r + ON p.product_id=r.product_id + GROUP BY r.product_id -- Problem 4: Retrieve the top 5 users with the highest total amount spent on orders -- Write an SQL query to retrieve the top 5 users with the highest total amount spent on orders. -- The result should include the user ID, username, and the total amount spent. +-- Problem 4 +SELECT u.user_id, u.username, sum(o.total_amount) FROM Users u + JOIN Orders o + ON u.user_id=o.user_id + GROUP BY u.user_id, u.username + ORDER BY 3 DESC + LIMIT 5 + + + + + + + + + + + From 2821c8c01202eb38a0b1f7f0b9a371bc7a9af588 Mon Sep 17 00:00:00 2001 From: kaidenjessani1 <157517498+kaidenjessani1@users.noreply.github.com> Date: Mon, 29 Jan 2024 19:50:07 -0500 Subject: [PATCH 05/20] task2.sql --- sql/task2.sql | 43 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/sql/task2.sql b/sql/task2.sql index ad2596731..11501e254 100644 --- a/sql/task2.sql +++ b/sql/task2.sql @@ -2,18 +2,59 @@ -- Write an SQL query to retrieve the products with the highest average rating. -- The result should include the product ID, product name, and the average rating. -- Hint: You may need to use subqueries or common table expressions (CTEs) to solve this problem. +with +avg_ratings as ( + SELECT p.product_id, p.product_name, AVG(rating) over (PARTITION BY p.product_id) avg_rating FROM Products p + JOIN Reviews r + ON p.product_id=r.product_id +) +SELECT * FROM avg_ratings + WHERE avg_rating = (SELECT MAX(avg_rating) FROM avg_ratings) -- Problem 6: Retrieve the users who have made at least one order in each category -- Write an SQL query to retrieve the users who have made at least one order in each category. -- The result should include the user ID and username. -- Hint: You may need to use subqueries or joins to solve this problem. +with +tmp AS ( + SELECT COUNT(DISTINCT c.category_id) dist_cat_n, u.user_id, u.username FROM Users u + JOIN Orders o + ON u.user_id=o.user_id + JOIN Order_Items oi + ON o.order_id=oi.order_id + JOIN Products p + ON p.product_id=oi.product_id + JOIN Categories c + ON p.category_id=c.category_id + GROUP BY 2,3 +) +SELECT user_id, username FROM tmp +WHERE dist_cat_n = (SELECT COUNT(DISTINCT category_id) FROM Categories) -- Problem 7: Retrieve the products that have not received any reviews -- Write an SQL query to retrieve the products that have not received any reviews. -- The result should include the product ID and product name. -- Hint: You may need to use subqueries or left joins to solve this problem. +SELECT product_id, product_name FROM Products +WHERE product_id not in (SELECT DISTINCT product_id FROM Reviews) -- Problem 8: Retrieve the users who have made consecutive orders on consecutive days -- Write an SQL query to retrieve the users who have made consecutive orders on consecutive days. -- The result should include the user ID and username. --- Hint: You may need to use subqueries or window functions to solve this problem. \ No newline at end of file +-- Hint: You may need to use subqueries or window functions to solve this problem. +with orders_with_prev AS ( + SELECT + o.user_id, + u.username, + o.order_date, + LAG(o.order_date,1) OVER (PARTITION BY o.user_id ORDER BY o.order_date) previous_order_date + FROM Orders o + JOIN Users u + ON o.user_id=u.user_id + ORDER BY 1,3 + ) + SELECT + user_id, + username + FROM orders_with_prev + WHERE Cast((JulianDay(order_date) - JulianDay(previous_order_date)) As Integer) = 1 From c4115df6d70c23dd178e0e081d5b2ac4e2d76ed4 Mon Sep 17 00:00:00 2001 From: kaidenjessani1 <157517498+kaidenjessani1@users.noreply.github.com> Date: Mon, 29 Jan 2024 19:50:29 -0500 Subject: [PATCH 06/20] task3.sql --- sql/task3.sql | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/sql/task3.sql b/sql/task3.sql index f078a9439..dd4e81b67 100644 --- a/sql/task3.sql +++ b/sql/task3.sql @@ -2,18 +2,83 @@ -- Write an SQL query to retrieve the top 3 categories with the highest total sales amount. -- The result should include the category ID, category name, and the total sales amount. -- Hint: You may need to use subqueries, joins, and aggregate functions to solve this problem. +with tot_q AS ( + SELECT + oi.product_id, + SUM(oi.quantity) total_q_per_prod + FROM Order_Items oi + GROUP BY oi.product_id +) +SELECT c.category_name, c.category_id, SUM(t.total_q_per_prod) q_per_category FROM tot_q t + JOIN Products p + ON t.product_id=p.product_id + JOIN Categories c + ON p.category_id=c.category_id + GROUP BY c.category_id -- Problem 10: Retrieve the users who have placed orders for all products in the Toys & Games -- Write an SQL query to retrieve the users who have placed orders for all products in the Toys & Games -- The result should include the user ID and username. -- Hint: You may need to use subqueries, joins, and aggregate functions to solve this problem. +SELECT u.user_id, u.username FROM Users u + JOIN Orders o + ON u.user_id=o.user_id +WHERE o.order_id IN ( + SELECT oi.order_id FROM Order_Items oi + JOIN Products p + ON oi.product_id=p.product_id + JOIN Categories c + ON p.category_id=c.category_id + WHERE c.category_name LIKE 'Toys & Games' +) -- Problem 11: Retrieve the products that have the highest price within each category -- Write an SQL query to retrieve the products that have the highest price within each category. -- The result should include the product ID, product name, category ID, and price. -- Hint: You may need to use subqueries, joins, and window functions to solve this problem. +SELECT p.product_id, p.product_name, c.category_id, MAX(p.price) FROM Products p + JOIN Categories c + ON p.category_id=c.category_id + GROUP BY c.category_id -- Problem 12: Retrieve the users who have placed orders on consecutive days for at least 3 days -- Write an SQL query to retrieve the users who have placed orders on consecutive days for at least 3 days. -- The result should include the user ID and username. -- Hint: You may need to use subqueries, joins, and window functions to solve this problem. +with + users_with_date_diff AS ( + SELECT + o.user_id, + u.username, + o.order_date, + LAG(o.order_date,1) OVER (PARTITION BY o.user_id ORDER BY o.order_date) previous_order_date, + LEAD(o.order_date,1) OVER (PARTITION BY o.user_id ORDER BY o.order_date) next_order_date + FROM Orders o + JOIN Users u + ON o.user_id=u.user_id + ORDER BY 1,3 + ), + users_with_date_diff_nominal AS ( + SELECT + user_id, + username, + order_date, + previous_order_date, + next_order_date, + Cast((JulianDay(order_date) - JulianDay(previous_order_date)) As Integer) date_diff_before, + Cast((JulianDay(next_order_date) - JulianDay(order_date)) As Integer) date_diff_after + FROM users_with_date_diff + --WHERE Cast((JulianDay(order_date) - JulianDay(previous_order_date)) As Integer) = 1 + ) +SELECT + user_id, + username +FROM users_with_date_diff_nominal +WHERE date_diff_before&date_diff_after + + + + + + + From 05f09af28dae7416ff356f35f63d5509f4c2818b Mon Sep 17 00:00:00 2001 From: kaidenjessani1 <157517498+kaidenjessani1@users.noreply.github.com> Date: Mon, 29 Jan 2024 19:53:45 -0500 Subject: [PATCH 07/20] results --- results/pro | 1 + 1 file changed, 1 insertion(+) create mode 100644 results/pro diff --git a/results/pro b/results/pro new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/results/pro @@ -0,0 +1 @@ + From 865d8bfc87e1f66ffd105c0ff1daa62e117e61d7 Mon Sep 17 00:00:00 2001 From: kaidenjessani1 <157517498+kaidenjessani1@users.noreply.github.com> Date: Mon, 29 Jan 2024 19:54:16 -0500 Subject: [PATCH 08/20] Add files via upload --- results/problem1 | 3 +++ results/problem10 | 2 ++ results/problem11 | 9 +++++++++ results/problem12 | 1 + results/problem2 | 31 +++++++++++++++++++++++++++++++ results/problem3 | 17 +++++++++++++++++ results/problem4 | 6 ++++++ results/problem5 | 6 ++++++ results/problem6 | 1 + results/problem7 | 1 + results/problem8 | 1 + results/problem9 | 9 +++++++++ 12 files changed, 87 insertions(+) create mode 100644 results/problem1 create mode 100644 results/problem10 create mode 100644 results/problem11 create mode 100644 results/problem12 create mode 100644 results/problem2 create mode 100644 results/problem3 create mode 100644 results/problem4 create mode 100644 results/problem5 create mode 100644 results/problem6 create mode 100644 results/problem7 create mode 100644 results/problem8 create mode 100644 results/problem9 diff --git a/results/problem1 b/results/problem1 new file mode 100644 index 000000000..4f04d9b89 --- /dev/null +++ b/results/problem1 @@ -0,0 +1,3 @@ +product_id,product_name,description,price,category_id +15,Mountain Bike,Conquer the trails with this high-performance mountain bike.,1000.0,8 +16,Tennis Racket,Take your tennis game to the next level with this professional-grade racket.,54.0,8 diff --git a/results/problem10 b/results/problem10 new file mode 100644 index 000000000..489324448 --- /dev/null +++ b/results/problem10 @@ -0,0 +1,2 @@ +user_id,username +5,sarahwilson diff --git a/results/problem11 b/results/problem11 new file mode 100644 index 000000000..11ae24abc --- /dev/null +++ b/results/problem11 @@ -0,0 +1,9 @@ +product_id,product_name,category_id,MAX(p.price) +1,Smartphone X,1,500.0 +3,Laptop Pro,2,1200.0 +6,Designer Dress,3,300.0 +7,Coffee Maker,4,80.0 +9,Action Camera,5,200.0 +12,Skincare Set,6,150.0 +14,Weighted Blanket,7,100.0 +15,Mountain Bike,8,1000.0 diff --git a/results/problem12 b/results/problem12 new file mode 100644 index 000000000..9183d6767 --- /dev/null +++ b/results/problem12 @@ -0,0 +1 @@ +user_id,username diff --git a/results/problem2 b/results/problem2 new file mode 100644 index 000000000..562aecd1f --- /dev/null +++ b/results/problem2 @@ -0,0 +1,31 @@ +user_id,username,count(o.order_id) +1,johndoe,1 +2,janesmith,1 +3,maryjones,1 +4,robertbrown,1 +5,sarahwilson,1 +6,michaellee,1 +7,lisawilliams,1 +8,chrisharris,1 +9,emilythompson,1 +10,davidmartinez,1 +11,amandajohnson,1 +12,jasonrodriguez,1 +13,ashleytaylor,1 +14,matthewthomas,1 +15,sophiawalker,1 +16,jacobanderson,1 +17,olivialopez,1 +18,ethanmiller,1 +19,emilygonzalez,1 +20,williamhernandez,1 +21,sophiawright,1 +22,alexanderhill,1 +23,madisonmoore,1 +24,jamesrogers,1 +25,emilyward,1 +26,benjamincarter,1 +27,gracestewart,1 +28,danielturner,1 +29,elliecollins,1 +30,williamwood,1 \ No newline at end of file diff --git a/results/problem3 b/results/problem3 new file mode 100644 index 000000000..28046f1ea --- /dev/null +++ b/results/problem3 @@ -0,0 +1,17 @@ +product_id,product_name,AVG(r.rating) +1,Smartphone X,5.0 +2,Wireless Headphones,4.0 +3,Laptop Pro,3.0 +4,Smart TV,5.0 +5,Running Shoes,2.0 +6,Designer Dress,4.0 +7,Coffee Maker,5.0 +8,Toaster Oven,3.0 +9,Action Camera,4.0 +10,Board Game Collection,1.0 +11,Yoga Mat,5.0 +12,Skincare Set,4.0 +13,Vitamin C Supplement,2.0 +14,Weighted Blanket,3.0 +15,Mountain Bike,5.0 +16,Tennis Racket,4.0 \ No newline at end of file diff --git a/results/problem4 b/results/problem4 new file mode 100644 index 000000000..dcdd96339 --- /dev/null +++ b/results/problem4 @@ -0,0 +1,6 @@ +user_id,username,sum(o.total_amount) +12,jasonrodriguez,160.0 +4,robertbrown,155.0 +8,chrisharris,150.0 +24,jamesrogers,150.0 +17,olivialopez,145.0 diff --git a/results/problem5 b/results/problem5 new file mode 100644 index 000000000..9c02dee9b --- /dev/null +++ b/results/problem5 @@ -0,0 +1,6 @@ +product_id,product_name,avg_rating +1,Smartphone X,5.0 +4,Smart TV,5.0 +7,Coffee Maker,5.0 +11,Yoga Mat,5.0 +15,Mountain Bike,5.0 diff --git a/results/problem6 b/results/problem6 new file mode 100644 index 000000000..9183d6767 --- /dev/null +++ b/results/problem6 @@ -0,0 +1 @@ +user_id,username diff --git a/results/problem7 b/results/problem7 new file mode 100644 index 000000000..8024ed24e --- /dev/null +++ b/results/problem7 @@ -0,0 +1 @@ +product_id,product_name diff --git a/results/problem8 b/results/problem8 new file mode 100644 index 000000000..9183d6767 --- /dev/null +++ b/results/problem8 @@ -0,0 +1 @@ +user_id,username diff --git a/results/problem9 b/results/problem9 new file mode 100644 index 000000000..600052885 --- /dev/null +++ b/results/problem9 @@ -0,0 +1,9 @@ +category_name,category_id,q_per_category +Electronics,1,3 +Books,2,4 +Clothing,3,3 +Home & Kitchen,4,4 +Toys & Games,5,3 +Beauty & Personal Care,6,4 +Health & Household,7,3 +Sports & Outdoors,8,4 From 8342f88f2dafe6316a3ac0cca2ac4ee166fdbdff Mon Sep 17 00:00:00 2001 From: kaidenjessani1 <157517498+kaidenjessani1@users.noreply.github.com> Date: Mon, 29 Jan 2024 19:55:06 -0500 Subject: [PATCH 09/20] Delete results/pro --- results/pro | 1 - 1 file changed, 1 deletion(-) delete mode 100644 results/pro diff --git a/results/pro b/results/pro deleted file mode 100644 index 8b1378917..000000000 --- a/results/pro +++ /dev/null @@ -1 +0,0 @@ - From b39bb73fa76b4bda09c90c66ecc85f9be4224ed0 Mon Sep 17 00:00:00 2001 From: kaidenjessani1 <157517498+kaidenjessani1@users.noreply.github.com> Date: Mon, 29 Jan 2024 23:23:56 -0500 Subject: [PATCH 10/20] Delete sql/task1.sql --- sql/task1.sql | 47 ----------------------------------------------- 1 file changed, 47 deletions(-) delete mode 100644 sql/task1.sql diff --git a/sql/task1.sql b/sql/task1.sql deleted file mode 100644 index fcfd6a52e..000000000 --- a/sql/task1.sql +++ /dev/null @@ -1,47 +0,0 @@ --- Problem 1: Retrieve all products in the Sports category --- Write an SQL query to retrieve all products in a specific category. --- Problem 1 -SELECT p.* FROM Products p - LEFT JOIN Categories c - ON p.category_id=c.category_id - WHERE LOWER(c.category_name) like '%sports%' - --- Problem 2: Retrieve the total number of orders for each user --- Write an SQL query to retrieve the total number of orders for each user. --- The result should include the user ID, username, and the total number of orders. --- Problem 2 -SELECT u.user_id, u.username, count(o.order_id) FROM Users u - JOIN Orders o - ON u.user_id=o.user_id - GROUP BY u.user_id - --- Problem 3: Retrieve the average rating for each product --- Write an SQL query to retrieve the average rating for each product. --- The result should include the product ID, product name, and the average rating. --- Problem 3 -SELECT r.product_id, p.product_name, AVG(r.rating) FROM Products p - JOIN Reviews r - ON p.product_id=r.product_id - GROUP BY r.product_id - --- Problem 4: Retrieve the top 5 users with the highest total amount spent on orders --- Write an SQL query to retrieve the top 5 users with the highest total amount spent on orders. --- The result should include the user ID, username, and the total amount spent. --- Problem 4 -SELECT u.user_id, u.username, sum(o.total_amount) FROM Users u - JOIN Orders o - ON u.user_id=o.user_id - GROUP BY u.user_id, u.username - ORDER BY 3 DESC - LIMIT 5 - - - - - - - - - - - From c8b378a4e93f4bf394652a895903f301e31352df Mon Sep 17 00:00:00 2001 From: kaidenjessani1 <157517498+kaidenjessani1@users.noreply.github.com> Date: Mon, 29 Jan 2024 23:24:03 -0500 Subject: [PATCH 11/20] Delete sql/task2.sql --- sql/task2.sql | 60 --------------------------------------------------- 1 file changed, 60 deletions(-) delete mode 100644 sql/task2.sql diff --git a/sql/task2.sql b/sql/task2.sql deleted file mode 100644 index 11501e254..000000000 --- a/sql/task2.sql +++ /dev/null @@ -1,60 +0,0 @@ --- Problem 5: Retrieve the products with the highest average rating --- Write an SQL query to retrieve the products with the highest average rating. --- The result should include the product ID, product name, and the average rating. --- Hint: You may need to use subqueries or common table expressions (CTEs) to solve this problem. -with -avg_ratings as ( - SELECT p.product_id, p.product_name, AVG(rating) over (PARTITION BY p.product_id) avg_rating FROM Products p - JOIN Reviews r - ON p.product_id=r.product_id -) -SELECT * FROM avg_ratings - WHERE avg_rating = (SELECT MAX(avg_rating) FROM avg_ratings) - --- Problem 6: Retrieve the users who have made at least one order in each category --- Write an SQL query to retrieve the users who have made at least one order in each category. --- The result should include the user ID and username. --- Hint: You may need to use subqueries or joins to solve this problem. -with -tmp AS ( - SELECT COUNT(DISTINCT c.category_id) dist_cat_n, u.user_id, u.username FROM Users u - JOIN Orders o - ON u.user_id=o.user_id - JOIN Order_Items oi - ON o.order_id=oi.order_id - JOIN Products p - ON p.product_id=oi.product_id - JOIN Categories c - ON p.category_id=c.category_id - GROUP BY 2,3 -) -SELECT user_id, username FROM tmp -WHERE dist_cat_n = (SELECT COUNT(DISTINCT category_id) FROM Categories) - --- Problem 7: Retrieve the products that have not received any reviews --- Write an SQL query to retrieve the products that have not received any reviews. --- The result should include the product ID and product name. --- Hint: You may need to use subqueries or left joins to solve this problem. -SELECT product_id, product_name FROM Products -WHERE product_id not in (SELECT DISTINCT product_id FROM Reviews) - --- Problem 8: Retrieve the users who have made consecutive orders on consecutive days --- Write an SQL query to retrieve the users who have made consecutive orders on consecutive days. --- The result should include the user ID and username. --- Hint: You may need to use subqueries or window functions to solve this problem. -with orders_with_prev AS ( - SELECT - o.user_id, - u.username, - o.order_date, - LAG(o.order_date,1) OVER (PARTITION BY o.user_id ORDER BY o.order_date) previous_order_date - FROM Orders o - JOIN Users u - ON o.user_id=u.user_id - ORDER BY 1,3 - ) - SELECT - user_id, - username - FROM orders_with_prev - WHERE Cast((JulianDay(order_date) - JulianDay(previous_order_date)) As Integer) = 1 From 045280d4833540439835129af39c72661d0897e4 Mon Sep 17 00:00:00 2001 From: kaidenjessani1 <157517498+kaidenjessani1@users.noreply.github.com> Date: Mon, 29 Jan 2024 23:24:10 -0500 Subject: [PATCH 12/20] Delete sql/task3.sql --- sql/task3.sql | 84 --------------------------------------------------- 1 file changed, 84 deletions(-) delete mode 100644 sql/task3.sql diff --git a/sql/task3.sql b/sql/task3.sql deleted file mode 100644 index dd4e81b67..000000000 --- a/sql/task3.sql +++ /dev/null @@ -1,84 +0,0 @@ --- Problem 9: Retrieve the top 3 categories with the highest total sales amount --- Write an SQL query to retrieve the top 3 categories with the highest total sales amount. --- The result should include the category ID, category name, and the total sales amount. --- Hint: You may need to use subqueries, joins, and aggregate functions to solve this problem. -with tot_q AS ( - SELECT - oi.product_id, - SUM(oi.quantity) total_q_per_prod - FROM Order_Items oi - GROUP BY oi.product_id -) -SELECT c.category_name, c.category_id, SUM(t.total_q_per_prod) q_per_category FROM tot_q t - JOIN Products p - ON t.product_id=p.product_id - JOIN Categories c - ON p.category_id=c.category_id - GROUP BY c.category_id - --- Problem 10: Retrieve the users who have placed orders for all products in the Toys & Games --- Write an SQL query to retrieve the users who have placed orders for all products in the Toys & Games --- The result should include the user ID and username. --- Hint: You may need to use subqueries, joins, and aggregate functions to solve this problem. -SELECT u.user_id, u.username FROM Users u - JOIN Orders o - ON u.user_id=o.user_id -WHERE o.order_id IN ( - SELECT oi.order_id FROM Order_Items oi - JOIN Products p - ON oi.product_id=p.product_id - JOIN Categories c - ON p.category_id=c.category_id - WHERE c.category_name LIKE 'Toys & Games' -) - --- Problem 11: Retrieve the products that have the highest price within each category --- Write an SQL query to retrieve the products that have the highest price within each category. --- The result should include the product ID, product name, category ID, and price. --- Hint: You may need to use subqueries, joins, and window functions to solve this problem. -SELECT p.product_id, p.product_name, c.category_id, MAX(p.price) FROM Products p - JOIN Categories c - ON p.category_id=c.category_id - GROUP BY c.category_id - --- Problem 12: Retrieve the users who have placed orders on consecutive days for at least 3 days --- Write an SQL query to retrieve the users who have placed orders on consecutive days for at least 3 days. --- The result should include the user ID and username. --- Hint: You may need to use subqueries, joins, and window functions to solve this problem. -with - users_with_date_diff AS ( - SELECT - o.user_id, - u.username, - o.order_date, - LAG(o.order_date,1) OVER (PARTITION BY o.user_id ORDER BY o.order_date) previous_order_date, - LEAD(o.order_date,1) OVER (PARTITION BY o.user_id ORDER BY o.order_date) next_order_date - FROM Orders o - JOIN Users u - ON o.user_id=u.user_id - ORDER BY 1,3 - ), - users_with_date_diff_nominal AS ( - SELECT - user_id, - username, - order_date, - previous_order_date, - next_order_date, - Cast((JulianDay(order_date) - JulianDay(previous_order_date)) As Integer) date_diff_before, - Cast((JulianDay(next_order_date) - JulianDay(order_date)) As Integer) date_diff_after - FROM users_with_date_diff - --WHERE Cast((JulianDay(order_date) - JulianDay(previous_order_date)) As Integer) = 1 - ) -SELECT - user_id, - username -FROM users_with_date_diff_nominal -WHERE date_diff_before&date_diff_after - - - - - - - From a9b44854db317da84826d47d013518e1fab6cc2f Mon Sep 17 00:00:00 2001 From: kaidenjessani1 <157517498+kaidenjessani1@users.noreply.github.com> Date: Mon, 29 Jan 2024 23:24:29 -0500 Subject: [PATCH 13/20] Add files via upload --- sql/task1.sql | 91 ++++++++++++++++++++++++++++++++++++++ sql/task2.sql | 85 ++++++++++++++++++++++++++++++++++++ sql/task3.sql | 118 ++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 294 insertions(+) create mode 100644 sql/task1.sql create mode 100644 sql/task2.sql create mode 100644 sql/task3.sql diff --git a/sql/task1.sql b/sql/task1.sql new file mode 100644 index 000000000..b4a3edf70 --- /dev/null +++ b/sql/task1.sql @@ -0,0 +1,91 @@ +-- Problem 1: +-- Question: Retrieve all products in the Sports category +-- Write an SQL query to retrieve all products in a specific category. +-- +-- Main Query: +-- Selects all columns (p.*) from the 'Products' table. +-- Uses a LEFT JOIN to include all products from the 'Products' table, and matching categories from the 'Categories' table based on the category_id. +-- LEFT JOIN: +-- Connects rows from 'Products' to 'Categories' based on the common column category_id. +-- Products that do not have a matching category will still be included in the result with NULL values for category-related columns. +-- WHERE clause: +-- Filters the result to include only rows where the lowercase category_name from the 'Categories' table contains the substring 'sports'. +-- The LIKE operator with % is used for a partial match, and LOWER() is used to perform a case-insensitive comparison. +-- +-- Main query to select products from the 'Products' table based on a left join with 'Categories' +SELECT p.* FROM Products p -- Selects all columns from the 'Products' table + LEFT JOIN Categories c + ON p.category_id=c.category_id ---- Left join with 'Categories' table + WHERE LOWER(c.category_name) like '%sports%' -- Filters products where the category name contains 'sports' in a case-insensitive manner + +-- Problem 2: +-- Question: Retrieve the total number of orders for each user +-- Write an SQL query to retrieve the total number of orders for each user. +-- The result should include the user ID, username, and the total number of orders. +--Main Query: +-- Selects the user_id and username from the 'Users' table. +-- Counts the number of orders for each user using the COUNT() function on the order_id. +-- Uses a JOIN operation to connect rows from 'Users' to 'Orders' based on the common column user_id. +-- JOIN clause: +-- Connects rows from 'Users' to 'Orders' based on the common column user_id. +-- GROUP BY clause: +-- Groups the results by user_id, so the count of orders is calculated for each user separately. +-- +-- Main query to count the number of orders for each user +SELECT u.user_id, u.username, count(o.order_id) FROM Users u + JOIN Orders o + ON u.user_id=o.user_id -- Joining Users and Orders tables based on user_id + GROUP BY u.user_id -- Grouping the results by user_id + +-- Problem 3: +-- Question: Retrieve the average rating for each product +-- Write an SQL query to retrieve the average rating for each product. +-- The result should include the product ID, product name, and the average rating. +-- Main Query: +-- Selects the product_id and product_name from the 'Products' table. +-- Calculates the average rating for each product using the AVG() function on the rating column from the 'Reviews' table. +-- Uses a JOIN operation to connect rows from 'Products' to 'Reviews' based on the common column product_id. +-- JOIN clause: +-- Connects rows from 'Products' to 'Reviews' based on the common column product_id. +-- GROUP BY clause: +-- Groups the results by product_id, so the average rating is calculated for each product separately. +-- +-- Main query to calculate the average rating for each product based on reviews +SELECT r.product_id, p.product_name, AVG(r.rating) FROM Products p + JOIN Reviews r + ON p.product_id=r.product_id -- Joining Products and Reviews tables based on product_id + GROUP BY r.product_id -- Grouping the results by product_id + +-- Problem 4: Retrieve the top 5 users with the highest total amount spent on orders +-- Write an SQL query to retrieve the top 5 users with the highest total amount spent on orders. +-- The result should include the user ID, username, and the total amount spent. +-- +-- Main Query: +-- Selects the user_id and username from the 'Users' table. +-- Calculates the total amount spent by each user using the SUM() function on the total_amount column from the 'Orders' table. +-- Uses a JOIN operation to connect rows from 'Users' to 'Orders' based on the common column user_id. +-- JOIN clause: +-- Connects rows from 'Users' to 'Orders' based on the common column user_id. +-- +-- Groups the results by user_id and username, so the total amount is calculated for each user separately. +-- Orders the results in descending order based on the total amount (3 represents the position of the SUM(o.total_amount) expression in the SELECT list). +-- Limits the result set to the top 5 users by total amount spent. +-- +-- Main query to calculate the total amount spent by each user and limit the result to the top 5 users by total amount +SELECT u.user_id, u.username, sum(o.total_amount) FROM Users u + JOIN Orders o + ON u.user_id=o.user_id -- Joining Users and Orders tables based on user_id + GROUP BY u.user_id, u.username -- Grouping the results by user_id and username + ORDER BY 3 DESC -- Ordering the results in descending order based on total amount + LIMIT 5 -- Limit the result to the top 5 users by total amount + + + + + + + + + + + diff --git a/sql/task2.sql b/sql/task2.sql new file mode 100644 index 000000000..2905dc81e --- /dev/null +++ b/sql/task2.sql @@ -0,0 +1,85 @@ +-- Problem 5: +-- Question: Retrieve the products with the highest average rating +-- Write an SQL query to retrieve the products with the highest average rating. +-- The result should include the product ID, product name, and the average rating. +-- Hint: You may need to use subqueries or common table expressions (CTEs) to solve this problem. +-- +-- avg_rating CTE Calculates the average ratings for each product using the AVG() function +-- The PARTITION BY p.product_id ensures that the average is calculated for each product individually. +WITH +avg_rating AS ( + -- Results to display product ID, product name, and the average rating + SELECT p.product_id, p.product_name, AVG(rating) over (PARTITION BY p.product_id) avg_rating FROM Products p + JOIN Reviews r + ON p.product_id=r.product_id +) +--Selects all columns from the AvgRating CTE. +SELECT * FROM avg_rating + WHERE avg_rating = (SELECT MAX(avg_rating) FROM avg_rating) --include only rows where the avg_rating is equal to the maximum average rating + +-- Problem 6: Retrieve the users who have made at least one order in each category +-- Write an SQL query to retrieve the users who have made at least one order in each category. +-- The result should include the user ID and username. +-- Hint: You may need to use subqueries or joins to solve this problem. +-- CTE DistinctCategories Calculates the count of distinct categories each user has ordered from. +WITH +DistinctCategories AS ( + SELECT COUNT(DISTINCT c.category_id) dist_cat_n, u.user_id, u.username FROM Users u + JOIN Orders o --Joins the Users, Orders, Order_Items, Products, and Categories tables + ON u.user_id=o.user_id + JOIN Order_Items oi + ON o.order_id=oi.order_id + JOIN Products p + ON p.product_id=oi.product_id + JOIN Categories c + ON p.category_id=c.category_id + GROUP BY 2,3 -- group the results by user_id and username +) +SELECT user_id, username FROM DistinctCategories +WHERE dist_cat_n = (SELECT COUNT(DISTINCT category_id) FROM Categories) --filters the results to include only users who have ordered from all distinct categories + +-- Problem 7: +-- Question: Retrieve the products that have not received any reviews +-- Write an SQL query to retrieve the products that have not received any reviews. +-- The result should include the product ID and product name. +-- Hint: You may need to use subqueries or left joins to solve this problem. +-- +-- The Main Query Selects product_id and product_name from the Products table. +-- The WHERE clause filters the results to include only products whose product_id is not present in the subquery. +-- +-- Main query to select products that have no reviews +SELECT product_id, product_name FROM Products +WHERE product_id not in (SELECT DISTINCT product_id FROM Reviews) -- Subquery to get distinct product IDs from the Reviews table + +-- Problem 8: +-- Question: Retrieve the users who have made consecutive orders on consecutive days +-- Write an SQL query to retrieve the users who have made consecutive orders on consecutive days. +-- The result should include the user ID and username. +-- Hint: You may need to use subqueries or window functions to solve this problem. +-- +-- CTE orders_with_prev: +-- Calculates the previous order date for each order of each user. +-- The LAG window function is used to obtain the previous order date based on the order date for each user. +-- Main Query: +-- Selects user_id and username from the orders_with_prev CTE. +-- The WHERE clause filters the results to include only users whose orders are on consecutive days, +-- The CAST function is used to convert the Julian day differences to integers. +-- +-- CTE named 'orders_with_prev' to calculate the previous order date for each order +WITH orders_with_prev AS ( + SELECT + o.user_id, + u.username, + o.order_date, + LAG(o.order_date,1) OVER (PARTITION BY o.user_id ORDER BY o.order_date) previous_order_date + FROM Orders o + JOIN Users u + ON o.user_id=u.user_id + ORDER BY 1,3 -- Order by user_id and order_date + ) +-- Main query to select user_id and username from the 'orders_with_prev' CTE + SELECT + user_id, + username + FROM orders_with_prev + WHERE Cast((JulianDay(order_date) - JulianDay(previous_order_date)) As Integer) = 1 -- Filter users with consecutive orders \ No newline at end of file diff --git a/sql/task3.sql b/sql/task3.sql new file mode 100644 index 000000000..95c86d9ac --- /dev/null +++ b/sql/task3.sql @@ -0,0 +1,118 @@ +-- Problem 9: +-- Question: Retrieve the top 3 categories with the highest total sales amount +-- Write an SQL query to retrieve the top 3 categories with the highest total sales amount. +-- The result should include the category ID, category name, and the total sales amount. +-- Hint: You may need to use subqueries, joins, and aggregate functions to solve this problem. +-- +-- CTE TotalSalesPerCategory calculates the total sales for each category +-- Joins Categories, Products, Order_Items and Orders tables. +-- Use Coalesce to exclude categories with no sales +-- orders the result by total_sales_amount in descending order and limits the result to the top 3 categories. +WITH TotalSalesPerCategory AS ( + SELECT + c.category_name, + c.category_id, + COALESCE(SUM(oi.quantity * oi.unit_price), 0) AS total_sales_amount + FROM + Categories c + LEFT JOIN Products p ON c.category_id = p.category_id --join Products and Categories tables + LEFT JOIN Order_Items oi ON p.product_id = oi.product_id --join Products and Order_Items tables + LEFT JOIN Orders o ON oi.order_id = o.order_id --join Orders and Order_Items tables + GROUP BY + c.category_id, c.category_name +) +SELECT + --Display category ID, category name, and total sales amount + cts.category_id, + cts.category_name, + cts.total_sales_amount +FROM + TotalSalesPerCategory cts +ORDER BY + cts.total_sales_amount DESC --orders the result by total_sales_amount in descending order +LIMIT 3; --Need the top 3 categories. + +-- Problem 10: +-- Question: Retrieve the users who have placed orders for all products in the Toys & Games +-- Write an SQL query to retrieve the users who have placed orders for all products in the Toys & Games +-- The result should include the user ID and username. +-- Hint: You may need to use subqueries, joins, and aggregate functions to solve this problem. +-- +-- CTE ProductsInToysAndGames calculates the total sales for each category +-- Join the Users, Orders, Order_Items, and the ProductsInToysAndGames CTE to identify users who have ordered products in the "Toys & Games" category. +-- +WITH ProductsInToysAndGames AS ( + SELECT + p.product_id + FROM + Categories c + JOIN Products p ON c.category_id = p.category_id -- join the Products and the Category Table + WHERE + c.category_name = 'Toys & Games' +) +SELECT + -- Display the user_id and username + u.user_id, + u.username +FROM + Users u + JOIN Orders o ON u.user_id = o.user_id -- join the user and Orders table + JOIN Order_Items oi ON o.order_id = oi.order_id -- Joins the order_items and Orders Table + JOIN ProductsInToysAndGames ptg ON oi.product_id = ptg.product_id -- Joins the cte to the order_items table +GROUP BY + u.user_id, u.username +HAVING + COUNT(DISTINCT ptg.product_id) = (SELECT COUNT(*) FROM ProductsInToysAndGames); -- Ensure the count of distinct product_ids equals the count of the number of products in the Toys and Games category. + +-- Problem 11: +-- Question: Retrieve the products that have the highest price within each category +-- Write an SQL query to retrieve the products that have the highest price within each category. +-- The result should include the product ID, product name, category ID, and price. +-- Hint: You may need to use subqueries, joins, and window functions to solve this problem. +-- +-- Select the product ID, product name, category ID, and the maximum price within each category. +SELECT p.product_id, p.product_name, c.category_id, MAX(p.price) FROM Products p + JOIN Categories c + ON p.category_id=c.category_id -- join the Products and Category Tables + GROUP BY c.category_id -- Maximum needs to be determined for each category + +-- Problem 12: Retrieve the users who have placed orders on consecutive days for at least 3 days +-- Write an SQL query to retrieve the users who have placed orders on consecutive days for at least 3 days. +-- The result should include the user ID and username. +-- Hint: You may need to use subqueries, joins, and window functions to solve this problem. +-- +--The UsersWithDateDiff CTE uses the LEAD and LAG functions to get the next and previous order dates for each user. +--This is used to check for consecutive days. +-- A second CTE (UsersWithDateDiffNominal) is used to calculates the date differences before and after each order. +-- The CAST function is used to convert the Julian day differences to integers. +-- The WHERE clause filters the results to include only users whose orders are on consecutive days, as indicated by date_diff_before = 1 AND date_diff_after = 1. +WITH + UsersWithDateDiff AS ( + SELECT + o.user_id, + u.username, + o.order_date, + LAG(o.order_date,1) OVER (PARTITION BY o.user_id ORDER BY o.order_date) previous_order_date, --Get previous order date + LEAD(o.order_date,1) OVER (PARTITION BY o.user_id ORDER BY o.order_date) next_order_date --Get Next order date + FROM Orders o + JOIN Users u + ON o.user_id=u.user_id -- Join Users and Orders Table + ORDER BY 1,3 + ), + UsersWithDateDiffNominal AS ( + SELECT + user_id, + username, + order_date, + previous_order_date, + next_order_date, + Cast((JulianDay(order_date) - JulianDay(previous_order_date)) As Integer) date_diff_before, -- Use Julian for simplicity + Cast((JulianDay(next_order_date) - JulianDay(order_date)) As Integer) date_diff_after + FROM UsersWithDateDiff + --WHERE Cast((JulianDay(order_date) - JulianDay(previous_order_date)) As Integer) = 1 + ) +SELECT + user_id, --Result displays User_id and Username + username +FROM UsersWithDateDiffNominal +WHERE date_diff_before&date_diff_after \ No newline at end of file From 18d67ad79e1cf5819c53ca0b7180170bf45ce1dd Mon Sep 17 00:00:00 2001 From: kaidenjessani1 <157517498+kaidenjessani1@users.noreply.github.com> Date: Mon, 29 Jan 2024 23:25:58 -0500 Subject: [PATCH 14/20] Update task1.sql --- sql/task1.sql | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sql/task1.sql b/sql/task1.sql index b4a3edf70..16561ca35 100644 --- a/sql/task1.sql +++ b/sql/task1.sql @@ -18,6 +18,7 @@ SELECT p.* FROM Products p -- Selects all columns from the 'Products' table ON p.category_id=c.category_id ---- Left join with 'Categories' table WHERE LOWER(c.category_name) like '%sports%' -- Filters products where the category name contains 'sports' in a case-insensitive manner +-- ************************************************************************************************************************************************************* -- Problem 2: -- Question: Retrieve the total number of orders for each user -- Write an SQL query to retrieve the total number of orders for each user. @@ -37,6 +38,7 @@ SELECT u.user_id, u.username, count(o.order_id) FROM Users u ON u.user_id=o.user_id -- Joining Users and Orders tables based on user_id GROUP BY u.user_id -- Grouping the results by user_id +-- ************************************************************************************************************************************************************* -- Problem 3: -- Question: Retrieve the average rating for each product -- Write an SQL query to retrieve the average rating for each product. @@ -56,6 +58,7 @@ SELECT r.product_id, p.product_name, AVG(r.rating) FROM Products p ON p.product_id=r.product_id -- Joining Products and Reviews tables based on product_id GROUP BY r.product_id -- Grouping the results by product_id +-- ************************************************************************************************************************************************************* -- Problem 4: Retrieve the top 5 users with the highest total amount spent on orders -- Write an SQL query to retrieve the top 5 users with the highest total amount spent on orders. -- The result should include the user ID, username, and the total amount spent. From 67943d454b1806618efa85c6ce031d659f542d5f Mon Sep 17 00:00:00 2001 From: kaidenjessani1 <157517498+kaidenjessani1@users.noreply.github.com> Date: Mon, 29 Jan 2024 23:26:26 -0500 Subject: [PATCH 15/20] Update task2.sql --- sql/task2.sql | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sql/task2.sql b/sql/task2.sql index 2905dc81e..0924d3f3a 100644 --- a/sql/task2.sql +++ b/sql/task2.sql @@ -17,6 +17,7 @@ avg_rating AS ( SELECT * FROM avg_rating WHERE avg_rating = (SELECT MAX(avg_rating) FROM avg_rating) --include only rows where the avg_rating is equal to the maximum average rating +-- ************************************************************************************************************************************************************* -- Problem 6: Retrieve the users who have made at least one order in each category -- Write an SQL query to retrieve the users who have made at least one order in each category. -- The result should include the user ID and username. @@ -38,6 +39,7 @@ DistinctCategories AS ( SELECT user_id, username FROM DistinctCategories WHERE dist_cat_n = (SELECT COUNT(DISTINCT category_id) FROM Categories) --filters the results to include only users who have ordered from all distinct categories +-- ************************************************************************************************************************************************************* -- Problem 7: -- Question: Retrieve the products that have not received any reviews -- Write an SQL query to retrieve the products that have not received any reviews. @@ -51,6 +53,7 @@ WHERE dist_cat_n = (SELECT COUNT(DISTINCT category_id) FROM Categories) --filter SELECT product_id, product_name FROM Products WHERE product_id not in (SELECT DISTINCT product_id FROM Reviews) -- Subquery to get distinct product IDs from the Reviews table +-- ************************************************************************************************************************************************************* -- Problem 8: -- Question: Retrieve the users who have made consecutive orders on consecutive days -- Write an SQL query to retrieve the users who have made consecutive orders on consecutive days. @@ -82,4 +85,4 @@ WITH orders_with_prev AS ( user_id, username FROM orders_with_prev - WHERE Cast((JulianDay(order_date) - JulianDay(previous_order_date)) As Integer) = 1 -- Filter users with consecutive orders \ No newline at end of file + WHERE Cast((JulianDay(order_date) - JulianDay(previous_order_date)) As Integer) = 1 -- Filter users with consecutive orders From c6a56e5389c6c744772e502ebe58e520d3e0992b Mon Sep 17 00:00:00 2001 From: kaidenjessani1 <157517498+kaidenjessani1@users.noreply.github.com> Date: Mon, 29 Jan 2024 23:26:58 -0500 Subject: [PATCH 16/20] Update task3.sql --- sql/task3.sql | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sql/task3.sql b/sql/task3.sql index 95c86d9ac..d71af8d7a 100644 --- a/sql/task3.sql +++ b/sql/task3.sql @@ -32,6 +32,7 @@ ORDER BY cts.total_sales_amount DESC --orders the result by total_sales_amount in descending order LIMIT 3; --Need the top 3 categories. +-- ************************************************************************************************************************************************************* -- Problem 10: -- Question: Retrieve the users who have placed orders for all products in the Toys & Games -- Write an SQL query to retrieve the users who have placed orders for all products in the Toys & Games @@ -64,6 +65,7 @@ GROUP BY HAVING COUNT(DISTINCT ptg.product_id) = (SELECT COUNT(*) FROM ProductsInToysAndGames); -- Ensure the count of distinct product_ids equals the count of the number of products in the Toys and Games category. +-- ************************************************************************************************************************************************************* -- Problem 11: -- Question: Retrieve the products that have the highest price within each category -- Write an SQL query to retrieve the products that have the highest price within each category. @@ -76,6 +78,7 @@ SELECT p.product_id, p.product_name, c.category_id, MAX(p.price) FROM Products p ON p.category_id=c.category_id -- join the Products and Category Tables GROUP BY c.category_id -- Maximum needs to be determined for each category +-- ************************************************************************************************************************************************************* -- Problem 12: Retrieve the users who have placed orders on consecutive days for at least 3 days -- Write an SQL query to retrieve the users who have placed orders on consecutive days for at least 3 days. -- The result should include the user ID and username. @@ -115,4 +118,4 @@ SELECT user_id, --Result displays User_id and Username username FROM UsersWithDateDiffNominal -WHERE date_diff_before&date_diff_after \ No newline at end of file +WHERE date_diff_before&date_diff_after From 297cb4c10b18f99ded6df751026aadb24a781087 Mon Sep 17 00:00:00 2001 From: kaidenjessani1 <157517498+kaidenjessani1@users.noreply.github.com> Date: Mon, 29 Jan 2024 23:27:17 -0500 Subject: [PATCH 17/20] Delete results/problem9 --- results/problem9 | 9 --------- 1 file changed, 9 deletions(-) delete mode 100644 results/problem9 diff --git a/results/problem9 b/results/problem9 deleted file mode 100644 index 600052885..000000000 --- a/results/problem9 +++ /dev/null @@ -1,9 +0,0 @@ -category_name,category_id,q_per_category -Electronics,1,3 -Books,2,4 -Clothing,3,3 -Home & Kitchen,4,4 -Toys & Games,5,3 -Beauty & Personal Care,6,4 -Health & Household,7,3 -Sports & Outdoors,8,4 From 1d62d58d4118f3d1800e4c3f18e59821605e6e76 Mon Sep 17 00:00:00 2001 From: kaidenjessani1 <157517498+kaidenjessani1@users.noreply.github.com> Date: Mon, 29 Jan 2024 23:27:38 -0500 Subject: [PATCH 18/20] Add files via upload --- results/problem9 | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 results/problem9 diff --git a/results/problem9 b/results/problem9 new file mode 100644 index 000000000..0b2c79c81 --- /dev/null +++ b/results/problem9 @@ -0,0 +1,4 @@ +category_id,category_name,total_sales_amount +8,Sports & Outdoors,155.0 +4,Home & Kitchen,145.0 +1,Electronics,125.0 From 199020fed659136cfc1bebddc9fad49a27f0c3b0 Mon Sep 17 00:00:00 2001 From: kaidenjessani1 <157517498+kaidenjessani1@users.noreply.github.com> Date: Mon, 29 Jan 2024 23:28:46 -0500 Subject: [PATCH 19/20] Create README_FINAL --- README_FINAL | 1 + 1 file changed, 1 insertion(+) create mode 100644 README_FINAL diff --git a/README_FINAL b/README_FINAL new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/README_FINAL @@ -0,0 +1 @@ + From 51125cb38a20848a611a76f57f68fdbabbf480aa Mon Sep 17 00:00:00 2001 From: kaidenjessani1 <157517498+kaidenjessani1@users.noreply.github.com> Date: Mon, 29 Jan 2024 23:31:14 -0500 Subject: [PATCH 20/20] Update README_FINAL --- README_FINAL | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README_FINAL b/README_FINAL index 8b1378917..57cbcfa5f 100644 --- a/README_FINAL +++ b/README_FINAL @@ -1 +1,3 @@ - +The "create_and_populate_database.py" program creates and populates a SQL database. +A results folder has been created. It has the expected results for each of the problems. +The "test_sql_queries.py" program executes the three SQL files, and compares the expected results against the actual results. The outcome is displayed in the console.