diff --git a/README_FINAL b/README_FINAL new file mode 100644 index 000000000..57cbcfa5f --- /dev/null +++ b/README_FINAL @@ -0,0 +1,3 @@ +The "create_and_populate_database.py" program creates and populates a SQL database. +A results folder has been created. It has the expected results for each of the problems. +The "test_sql_queries.py" program executes the three SQL files, and compares the expected results against the actual results. The outcome is displayed in the console. diff --git a/results/problem1 b/results/problem1 new file mode 100644 index 000000000..4f04d9b89 --- /dev/null +++ b/results/problem1 @@ -0,0 +1,3 @@ +product_id,product_name,description,price,category_id +15,Mountain Bike,Conquer the trails with this high-performance mountain bike.,1000.0,8 +16,Tennis Racket,Take your tennis game to the next level with this professional-grade racket.,54.0,8 diff --git a/results/problem10 b/results/problem10 new file mode 100644 index 000000000..489324448 --- /dev/null +++ b/results/problem10 @@ -0,0 +1,2 @@ +user_id,username +5,sarahwilson diff --git a/results/problem11 b/results/problem11 new file mode 100644 index 000000000..11ae24abc --- /dev/null +++ b/results/problem11 @@ -0,0 +1,9 @@ +product_id,product_name,category_id,MAX(p.price) +1,Smartphone X,1,500.0 +3,Laptop Pro,2,1200.0 +6,Designer Dress,3,300.0 +7,Coffee Maker,4,80.0 +9,Action Camera,5,200.0 +12,Skincare Set,6,150.0 +14,Weighted Blanket,7,100.0 +15,Mountain Bike,8,1000.0 diff --git a/results/problem12 b/results/problem12 new file mode 100644 index 000000000..9183d6767 --- /dev/null +++ b/results/problem12 @@ -0,0 +1 @@ +user_id,username diff --git a/results/problem2 b/results/problem2 new file mode 100644 index 000000000..562aecd1f --- /dev/null +++ b/results/problem2 @@ -0,0 +1,31 @@ +user_id,username,count(o.order_id) +1,johndoe,1 +2,janesmith,1 +3,maryjones,1 +4,robertbrown,1 +5,sarahwilson,1 +6,michaellee,1 +7,lisawilliams,1 +8,chrisharris,1 +9,emilythompson,1 +10,davidmartinez,1 +11,amandajohnson,1 +12,jasonrodriguez,1 +13,ashleytaylor,1 +14,matthewthomas,1 +15,sophiawalker,1 +16,jacobanderson,1 +17,olivialopez,1 +18,ethanmiller,1 +19,emilygonzalez,1 +20,williamhernandez,1 +21,sophiawright,1 +22,alexanderhill,1 +23,madisonmoore,1 +24,jamesrogers,1 +25,emilyward,1 +26,benjamincarter,1 +27,gracestewart,1 +28,danielturner,1 +29,elliecollins,1 +30,williamwood,1 \ No newline at end of file diff --git a/results/problem3 b/results/problem3 new file mode 100644 index 000000000..28046f1ea --- /dev/null +++ b/results/problem3 @@ -0,0 +1,17 @@ +product_id,product_name,AVG(r.rating) +1,Smartphone X,5.0 +2,Wireless Headphones,4.0 +3,Laptop Pro,3.0 +4,Smart TV,5.0 +5,Running Shoes,2.0 +6,Designer Dress,4.0 +7,Coffee Maker,5.0 +8,Toaster Oven,3.0 +9,Action Camera,4.0 +10,Board Game Collection,1.0 +11,Yoga Mat,5.0 +12,Skincare Set,4.0 +13,Vitamin C Supplement,2.0 +14,Weighted Blanket,3.0 +15,Mountain Bike,5.0 +16,Tennis Racket,4.0 \ No newline at end of file diff --git a/results/problem4 b/results/problem4 new file mode 100644 index 000000000..dcdd96339 --- /dev/null +++ b/results/problem4 @@ -0,0 +1,6 @@ +user_id,username,sum(o.total_amount) +12,jasonrodriguez,160.0 +4,robertbrown,155.0 +8,chrisharris,150.0 +24,jamesrogers,150.0 +17,olivialopez,145.0 diff --git a/results/problem5 b/results/problem5 new file mode 100644 index 000000000..9c02dee9b --- /dev/null +++ b/results/problem5 @@ -0,0 +1,6 @@ +product_id,product_name,avg_rating +1,Smartphone X,5.0 +4,Smart TV,5.0 +7,Coffee Maker,5.0 +11,Yoga Mat,5.0 +15,Mountain Bike,5.0 diff --git a/results/problem6 b/results/problem6 new file mode 100644 index 000000000..9183d6767 --- /dev/null +++ b/results/problem6 @@ -0,0 +1 @@ +user_id,username diff --git a/results/problem7 b/results/problem7 new file mode 100644 index 000000000..8024ed24e --- /dev/null +++ b/results/problem7 @@ -0,0 +1 @@ +product_id,product_name diff --git a/results/problem8 b/results/problem8 new file mode 100644 index 000000000..9183d6767 --- /dev/null +++ b/results/problem8 @@ -0,0 +1 @@ +user_id,username diff --git a/results/problem9 b/results/problem9 new file mode 100644 index 000000000..0b2c79c81 --- /dev/null +++ b/results/problem9 @@ -0,0 +1,4 @@ +category_id,category_name,total_sales_amount +8,Sports & Outdoors,155.0 +4,Home & Kitchen,145.0 +1,Electronics,125.0 diff --git a/sql/task1.sql b/sql/task1.sql index 90de336ca..16561ca35 100644 --- a/sql/task1.sql +++ b/sql/task1.sql @@ -1,14 +1,94 @@ --- Problem 1: Retrieve all products in the Sports category +-- Problem 1: +-- Question: Retrieve all products in the Sports category -- Write an SQL query to retrieve all products in a specific category. +-- +-- Main Query: +-- Selects all columns (p.*) from the 'Products' table. +-- Uses a LEFT JOIN to include all products from the 'Products' table, and matching categories from the 'Categories' table based on the category_id. +-- LEFT JOIN: +-- Connects rows from 'Products' to 'Categories' based on the common column category_id. +-- Products that do not have a matching category will still be included in the result with NULL values for category-related columns. +-- WHERE clause: +-- Filters the result to include only rows where the lowercase category_name from the 'Categories' table contains the substring 'sports'. +-- The LIKE operator with % is used for a partial match, and LOWER() is used to perform a case-insensitive comparison. +-- +-- Main query to select products from the 'Products' table based on a left join with 'Categories' +SELECT p.* FROM Products p -- Selects all columns from the 'Products' table + LEFT JOIN Categories c + ON p.category_id=c.category_id ---- Left join with 'Categories' table + WHERE LOWER(c.category_name) like '%sports%' -- Filters products where the category name contains 'sports' in a case-insensitive manner --- Problem 2: Retrieve the total number of orders for each user +-- ************************************************************************************************************************************************************* +-- Problem 2: +-- Question: Retrieve the total number of orders for each user -- Write an SQL query to retrieve the total number of orders for each user. -- The result should include the user ID, username, and the total number of orders. +--Main Query: +-- Selects the user_id and username from the 'Users' table. +-- Counts the number of orders for each user using the COUNT() function on the order_id. +-- Uses a JOIN operation to connect rows from 'Users' to 'Orders' based on the common column user_id. +-- JOIN clause: +-- Connects rows from 'Users' to 'Orders' based on the common column user_id. +-- GROUP BY clause: +-- Groups the results by user_id, so the count of orders is calculated for each user separately. +-- +-- Main query to count the number of orders for each user +SELECT u.user_id, u.username, count(o.order_id) FROM Users u + JOIN Orders o + ON u.user_id=o.user_id -- Joining Users and Orders tables based on user_id + GROUP BY u.user_id -- Grouping the results by user_id --- Problem 3: Retrieve the average rating for each product +-- ************************************************************************************************************************************************************* +-- Problem 3: +-- Question: Retrieve the average rating for each product -- Write an SQL query to retrieve the average rating for each product. -- The result should include the product ID, product name, and the average rating. +-- Main Query: +-- Selects the product_id and product_name from the 'Products' table. +-- Calculates the average rating for each product using the AVG() function on the rating column from the 'Reviews' table. +-- Uses a JOIN operation to connect rows from 'Products' to 'Reviews' based on the common column product_id. +-- JOIN clause: +-- Connects rows from 'Products' to 'Reviews' based on the common column product_id. +-- GROUP BY clause: +-- Groups the results by product_id, so the average rating is calculated for each product separately. +-- +-- Main query to calculate the average rating for each product based on reviews +SELECT r.product_id, p.product_name, AVG(r.rating) FROM Products p + JOIN Reviews r + ON p.product_id=r.product_id -- Joining Products and Reviews tables based on product_id + GROUP BY r.product_id -- Grouping the results by product_id +-- ************************************************************************************************************************************************************* -- Problem 4: Retrieve the top 5 users with the highest total amount spent on orders -- Write an SQL query to retrieve the top 5 users with the highest total amount spent on orders. -- The result should include the user ID, username, and the total amount spent. +-- +-- Main Query: +-- Selects the user_id and username from the 'Users' table. +-- Calculates the total amount spent by each user using the SUM() function on the total_amount column from the 'Orders' table. +-- Uses a JOIN operation to connect rows from 'Users' to 'Orders' based on the common column user_id. +-- JOIN clause: +-- Connects rows from 'Users' to 'Orders' based on the common column user_id. +-- +-- Groups the results by user_id and username, so the total amount is calculated for each user separately. +-- Orders the results in descending order based on the total amount (3 represents the position of the SUM(o.total_amount) expression in the SELECT list). +-- Limits the result set to the top 5 users by total amount spent. +-- +-- Main query to calculate the total amount spent by each user and limit the result to the top 5 users by total amount +SELECT u.user_id, u.username, sum(o.total_amount) FROM Users u + JOIN Orders o + ON u.user_id=o.user_id -- Joining Users and Orders tables based on user_id + GROUP BY u.user_id, u.username -- Grouping the results by user_id and username + ORDER BY 3 DESC -- Ordering the results in descending order based on total amount + LIMIT 5 -- Limit the result to the top 5 users by total amount + + + + + + + + + + + diff --git a/sql/task2.sql b/sql/task2.sql index ad2596731..0924d3f3a 100644 --- a/sql/task2.sql +++ b/sql/task2.sql @@ -1,19 +1,88 @@ --- Problem 5: Retrieve the products with the highest average rating +-- Problem 5: +-- Question: Retrieve the products with the highest average rating -- Write an SQL query to retrieve the products with the highest average rating. -- The result should include the product ID, product name, and the average rating. -- Hint: You may need to use subqueries or common table expressions (CTEs) to solve this problem. +-- +-- avg_rating CTE Calculates the average ratings for each product using the AVG() function +-- The PARTITION BY p.product_id ensures that the average is calculated for each product individually. +WITH +avg_rating AS ( + -- Results to display product ID, product name, and the average rating + SELECT p.product_id, p.product_name, AVG(rating) over (PARTITION BY p.product_id) avg_rating FROM Products p + JOIN Reviews r + ON p.product_id=r.product_id +) +--Selects all columns from the AvgRating CTE. +SELECT * FROM avg_rating + WHERE avg_rating = (SELECT MAX(avg_rating) FROM avg_rating) --include only rows where the avg_rating is equal to the maximum average rating +-- ************************************************************************************************************************************************************* -- Problem 6: Retrieve the users who have made at least one order in each category -- Write an SQL query to retrieve the users who have made at least one order in each category. -- The result should include the user ID and username. -- Hint: You may need to use subqueries or joins to solve this problem. +-- CTE DistinctCategories Calculates the count of distinct categories each user has ordered from. +WITH +DistinctCategories AS ( + SELECT COUNT(DISTINCT c.category_id) dist_cat_n, u.user_id, u.username FROM Users u + JOIN Orders o --Joins the Users, Orders, Order_Items, Products, and Categories tables + ON u.user_id=o.user_id + JOIN Order_Items oi + ON o.order_id=oi.order_id + JOIN Products p + ON p.product_id=oi.product_id + JOIN Categories c + ON p.category_id=c.category_id + GROUP BY 2,3 -- group the results by user_id and username +) +SELECT user_id, username FROM DistinctCategories +WHERE dist_cat_n = (SELECT COUNT(DISTINCT category_id) FROM Categories) --filters the results to include only users who have ordered from all distinct categories --- Problem 7: Retrieve the products that have not received any reviews +-- ************************************************************************************************************************************************************* +-- Problem 7: +-- Question: Retrieve the products that have not received any reviews -- Write an SQL query to retrieve the products that have not received any reviews. -- The result should include the product ID and product name. -- Hint: You may need to use subqueries or left joins to solve this problem. +-- +-- The Main Query Selects product_id and product_name from the Products table. +-- The WHERE clause filters the results to include only products whose product_id is not present in the subquery. +-- +-- Main query to select products that have no reviews +SELECT product_id, product_name FROM Products +WHERE product_id not in (SELECT DISTINCT product_id FROM Reviews) -- Subquery to get distinct product IDs from the Reviews table --- Problem 8: Retrieve the users who have made consecutive orders on consecutive days +-- ************************************************************************************************************************************************************* +-- Problem 8: +-- Question: Retrieve the users who have made consecutive orders on consecutive days -- Write an SQL query to retrieve the users who have made consecutive orders on consecutive days. -- The result should include the user ID and username. --- Hint: You may need to use subqueries or window functions to solve this problem. \ No newline at end of file +-- Hint: You may need to use subqueries or window functions to solve this problem. +-- +-- CTE orders_with_prev: +-- Calculates the previous order date for each order of each user. +-- The LAG window function is used to obtain the previous order date based on the order date for each user. +-- Main Query: +-- Selects user_id and username from the orders_with_prev CTE. +-- The WHERE clause filters the results to include only users whose orders are on consecutive days, +-- The CAST function is used to convert the Julian day differences to integers. +-- +-- CTE named 'orders_with_prev' to calculate the previous order date for each order +WITH orders_with_prev AS ( + SELECT + o.user_id, + u.username, + o.order_date, + LAG(o.order_date,1) OVER (PARTITION BY o.user_id ORDER BY o.order_date) previous_order_date + FROM Orders o + JOIN Users u + ON o.user_id=u.user_id + ORDER BY 1,3 -- Order by user_id and order_date + ) +-- Main query to select user_id and username from the 'orders_with_prev' CTE + SELECT + user_id, + username + FROM orders_with_prev + WHERE Cast((JulianDay(order_date) - JulianDay(previous_order_date)) As Integer) = 1 -- Filter users with consecutive orders diff --git a/sql/task3.sql b/sql/task3.sql index f078a9439..d71af8d7a 100644 --- a/sql/task3.sql +++ b/sql/task3.sql @@ -1,19 +1,121 @@ --- Problem 9: Retrieve the top 3 categories with the highest total sales amount +-- Problem 9: +-- Question: Retrieve the top 3 categories with the highest total sales amount -- Write an SQL query to retrieve the top 3 categories with the highest total sales amount. -- The result should include the category ID, category name, and the total sales amount. -- Hint: You may need to use subqueries, joins, and aggregate functions to solve this problem. +-- +-- CTE TotalSalesPerCategory calculates the total sales for each category +-- Joins Categories, Products, Order_Items and Orders tables. +-- Use Coalesce to exclude categories with no sales +-- orders the result by total_sales_amount in descending order and limits the result to the top 3 categories. +WITH TotalSalesPerCategory AS ( + SELECT + c.category_name, + c.category_id, + COALESCE(SUM(oi.quantity * oi.unit_price), 0) AS total_sales_amount + FROM + Categories c + LEFT JOIN Products p ON c.category_id = p.category_id --join Products and Categories tables + LEFT JOIN Order_Items oi ON p.product_id = oi.product_id --join Products and Order_Items tables + LEFT JOIN Orders o ON oi.order_id = o.order_id --join Orders and Order_Items tables + GROUP BY + c.category_id, c.category_name +) +SELECT + --Display category ID, category name, and total sales amount + cts.category_id, + cts.category_name, + cts.total_sales_amount +FROM + TotalSalesPerCategory cts +ORDER BY + cts.total_sales_amount DESC --orders the result by total_sales_amount in descending order +LIMIT 3; --Need the top 3 categories. --- Problem 10: Retrieve the users who have placed orders for all products in the Toys & Games +-- ************************************************************************************************************************************************************* +-- Problem 10: +-- Question: Retrieve the users who have placed orders for all products in the Toys & Games -- Write an SQL query to retrieve the users who have placed orders for all products in the Toys & Games -- The result should include the user ID and username. -- Hint: You may need to use subqueries, joins, and aggregate functions to solve this problem. +-- +-- CTE ProductsInToysAndGames calculates the total sales for each category +-- Join the Users, Orders, Order_Items, and the ProductsInToysAndGames CTE to identify users who have ordered products in the "Toys & Games" category. +-- +WITH ProductsInToysAndGames AS ( + SELECT + p.product_id + FROM + Categories c + JOIN Products p ON c.category_id = p.category_id -- join the Products and the Category Table + WHERE + c.category_name = 'Toys & Games' +) +SELECT + -- Display the user_id and username + u.user_id, + u.username +FROM + Users u + JOIN Orders o ON u.user_id = o.user_id -- join the user and Orders table + JOIN Order_Items oi ON o.order_id = oi.order_id -- Joins the order_items and Orders Table + JOIN ProductsInToysAndGames ptg ON oi.product_id = ptg.product_id -- Joins the cte to the order_items table +GROUP BY + u.user_id, u.username +HAVING + COUNT(DISTINCT ptg.product_id) = (SELECT COUNT(*) FROM ProductsInToysAndGames); -- Ensure the count of distinct product_ids equals the count of the number of products in the Toys and Games category. --- Problem 11: Retrieve the products that have the highest price within each category +-- ************************************************************************************************************************************************************* +-- Problem 11: +-- Question: Retrieve the products that have the highest price within each category -- Write an SQL query to retrieve the products that have the highest price within each category. -- The result should include the product ID, product name, category ID, and price. -- Hint: You may need to use subqueries, joins, and window functions to solve this problem. +-- +-- Select the product ID, product name, category ID, and the maximum price within each category. +SELECT p.product_id, p.product_name, c.category_id, MAX(p.price) FROM Products p + JOIN Categories c + ON p.category_id=c.category_id -- join the Products and Category Tables + GROUP BY c.category_id -- Maximum needs to be determined for each category +-- ************************************************************************************************************************************************************* -- Problem 12: Retrieve the users who have placed orders on consecutive days for at least 3 days -- Write an SQL query to retrieve the users who have placed orders on consecutive days for at least 3 days. -- The result should include the user ID and username. -- Hint: You may need to use subqueries, joins, and window functions to solve this problem. +-- +--The UsersWithDateDiff CTE uses the LEAD and LAG functions to get the next and previous order dates for each user. +--This is used to check for consecutive days. +-- A second CTE (UsersWithDateDiffNominal) is used to calculates the date differences before and after each order. +-- The CAST function is used to convert the Julian day differences to integers. +-- The WHERE clause filters the results to include only users whose orders are on consecutive days, as indicated by date_diff_before = 1 AND date_diff_after = 1. +WITH + UsersWithDateDiff AS ( + SELECT + o.user_id, + u.username, + o.order_date, + LAG(o.order_date,1) OVER (PARTITION BY o.user_id ORDER BY o.order_date) previous_order_date, --Get previous order date + LEAD(o.order_date,1) OVER (PARTITION BY o.user_id ORDER BY o.order_date) next_order_date --Get Next order date + FROM Orders o + JOIN Users u + ON o.user_id=u.user_id -- Join Users and Orders Table + ORDER BY 1,3 + ), + UsersWithDateDiffNominal AS ( + SELECT + user_id, + username, + order_date, + previous_order_date, + next_order_date, + Cast((JulianDay(order_date) - JulianDay(previous_order_date)) As Integer) date_diff_before, -- Use Julian for simplicity + Cast((JulianDay(next_order_date) - JulianDay(order_date)) As Integer) date_diff_after + FROM UsersWithDateDiff + --WHERE Cast((JulianDay(order_date) - JulianDay(previous_order_date)) As Integer) = 1 + ) +SELECT + user_id, --Result displays User_id and Username + username +FROM UsersWithDateDiffNominal +WHERE date_diff_before&date_diff_after diff --git a/tests/create_and_populate_database.py b/tests/create_and_populate_database.py new file mode 100644 index 000000000..83fc29a11 --- /dev/null +++ b/tests/create_and_populate_database.py @@ -0,0 +1,62 @@ +import sqlite3 +import pandas as pd + +class DatabaseSetup(): + def databaseConnection(self): + ''' + Method extablishes connection to database + --- + Returns: + (tuple): connection and cursor + ''' + + + self.conn=None + try: + self.conn = sqlite3.connect(database='my_database') + print(f'successful SQLite connection with id {id(self.conn)}') + except: + print('Error occurred.') + + self.cur = self.conn.cursor() + return self.conn, self.cur + + def createTables(self): + ''' + Method builds schema, creates tables + --- + ''' + with open('sql/schema.sql', 'r') as file: + sql_query = file.read() + + sep_queries = sql_query.split(';\n\n') + + for q in sep_queries: + result = self.conn.execute(q) + + def populateData(self): + ''' + Method populate data in database + ''' + categories_data_df = pd.read_csv('data/category_data.csv') + products_data_df = pd.read_csv('data/product_data.csv') + users_data_df = pd.read_csv('data/user_data.csv') + orders_data_df = pd.read_csv('data/order_data.csv') + order_Items_data_df = pd.read_csv('data/order_items_data.csv') + reviews_data_df = pd.read_csv('data/review_data.csv') + cart_data_df = pd.read_csv('data/cart_data.csv') + cart_Items_data_df = pd.read_csv('data/cart_item_data.csv') + payments_data_df = pd.read_csv('data/payment_data.csv') + shipping_data_df = pd.read_csv('data/shipping_data.csv') + + categories_data_df.to_sql('Categories', self.conn, if_exists='replace', index=False) + products_data_df.to_sql('Products', self.conn, if_exists='replace', index=False) + users_data_df.to_sql('Users', self.conn, if_exists='replace', index=False) + orders_data_df.to_sql('Orders', self.conn, if_exists='replace', index=False) + order_Items_data_df.to_sql('Order_Items', self.conn, if_exists='replace', index=False) + reviews_data_df.to_sql('Reviews', self.conn, if_exists='replace', index=False) + cart_data_df.to_sql('Cart', self.conn, if_exists='replace', index=False) + cart_Items_data_df.to_sql('Cart_Items', self.conn, if_exists='replace', index=False) + payments_data_df.to_sql('Payments', self.conn, if_exists='replace', index=False) + shipping_data_df.to_sql('Shipping', self.conn, if_exists='replace', index=False) + diff --git a/tests/test_sql_queries.py b/tests/test_sql_queries.py index 22b25d546..6042301c4 100644 --- a/tests/test_sql_queries.py +++ b/tests/test_sql_queries.py @@ -1,55 +1,101 @@ import unittest -import psycopg2 # Replace with appropriate database connector based on your database +import pandas as pd +import sys +from pathlib import Path +sys.path.append(str((Path(__file__).parent).parent)) +from create_and_populate_database import DatabaseSetup class TestSQLQueries(unittest.TestCase): def setUp(self): # Establish a connection to your test database - self.conn = psycopg2.connect( - dbname='your_dbname', - user='your_username', - password='your_password', - host='your_host', - port='your_port' - ) - self.cur = self.conn.cursor() - + db_set = DatabaseSetup() + db_set.databaseConnection() + self.conn, self.cur = db_set.conn, db_set.cur + def tearDown(self): # Close the database connection self.cur.close() self.conn.close() - def test_task1(self): - # Task 1: Example SQL query in task1.sql - with open('/sql/task1.sql', 'r') as file: + def get_queries(self, path): + ''' + Method fetches queries to check + --- + Params: + path (str): Path to a file with queries + + Returns: + sep_queries (list): List of queries + ''' + with open(path, 'r') as file: sql_query = file.read() + + sep_queries = sql_query.split('\n\n') + + return sep_queries[:4] - self.cur.execute(sql_query) - result = self.cur.fetchall() + def get_result_for_all_problems_in_task(self, queries_to_check, task_n): + ''' + Method iterate through queries for all problems in a task and compares results with expected results + --- + Params: + queries_to_check (list): list of queries to check + task_n (int): task number + + Returns: + task_result (bool): Method returns True or False + True - if all results matches expected results in the task; + False - otherwise + ''' + task_result = True + + add = 1 # Because iterator starts from 0 + if task_n==2: + add=5 # Problems starts from 5. for task 2 + elif task_n==3: + add=9 # Problems starts from 9. for task 3 + + for iter in range(len(queries_to_check)): + print(f"\n---------------------------------------------------------------------- \ + \nTASK {task_n}. PROBLEM {iter+add}") + result = pd.read_sql_query(queries_to_check[iter], self.conn) + path = 'results/problem'+str(iter+add) + expected_result = pd.read_csv(path) + print(f'RESULT: \n {result}') + print(f'\nEXPECTED RESULT: \n {expected_result}') + print(f'\nActual and expected are equal: {expected_result.equals(result)}') + task_result&=expected_result.equals(result) + + return task_result - # Define expected outcome for Task 1 and compare - expected_result = [ - # Define expected rows or values here based on the query output - ] + def test_task1(self): + # Task 1: Example SQL query in task1.sql + queries_task1 = self.get_queries(path = 'sql/task1.sql') + task1_result = self.get_result_for_all_problems_in_task(queries_to_check = queries_task1, task_n = 1) - self.assertEqual(result, expected_result, "Task 1: Query output doesn't match expected result.") + # Final check + self.assertEqual(True,task1_result, "Task 1: Query output doesn't match expected result.") def test_task2(self): # Task 2: Example SQL query in task2.sql - with open('/sql/task2.sql', 'r') as file: - sql_query = file.read() - - self.cur.execute(sql_query) - result = self.cur.fetchall() + queries_task2 = self.get_queries(path = 'sql/task2.sql') + task2_result = self.get_result_for_all_problems_in_task(queries_to_check = queries_task2, task_n = 2) - # Define expected outcome for Task 2 and compare - expected_result = [ - # Define expected rows or values here based on the query output - ] + # Final check + self.assertEqual(True,task2_result, "Task 2: Query output doesn't match expected result.") - self.assertEqual(result, expected_result, "Task 2: Query output doesn't match expected result.") + def test_task3(self): + # Task 3: Example SQL query in task3.sql + queries_task3 = self.get_queries(path = 'sql/task3.sql') + task3_result = self.get_result_for_all_problems_in_task(queries_to_check = queries_task3, task_n = 3) - # Add more test methods for additional SQL tasks + # Final check + self.assertEqual(True,task3_result, "Task 3: Query output doesn't match expected result.") if __name__ == '__main__': - unittest.main() \ No newline at end of file + db_set = DatabaseSetup() + db_set.databaseConnection() + db_set.createTables() + db_set.populateData() + unittest.main()