diff --git a/sql/task1.sql b/sql/task1.sql index 90de336ca..0d117052d 100644 --- a/sql/task1.sql +++ b/sql/task1.sql @@ -1,14 +1,63 @@ -- Problem 1: Retrieve all products in the Sports category -- Write an SQL query to retrieve all products in a specific category. +-- CTE 'SportsCategory' retrieves category_ids for category names that contain 'sports' word +-- (case-insensitive incase sports is the second word) +-- If 'Sportswear' was a category, this query would retrieve it's data as well + +WITH SportsCategory AS ( + SELECT category_id + FROM Categories + WHERE LOWER(category_name) LIKE '%sports%' +) + +SELECT * +FROM Products +WHERE category_id IN (SELECT category_id FROM SportsCategory); + + -- Problem 2: Retrieve the total number of orders for each user -- Write an SQL query to retrieve the total number of orders for each user. -- The result should include the user ID, username, and the total number of orders. +-- Used a 'LEFT JOIN' to include all users (even those that may have place no orders) +-- In PostgresSQL, COUNT() function doesn't return NULL if there are no rows to count. +-- Instead, it returns zero. + +SELECT u.user_id, u.username, count(distinct o.order_id) as Total_Number_Orders +FROM Users u +LEFT JOIN Orders o ON o.user_id = u.user_id +GROUP BY u.user_id, u.username; + + -- Problem 3: Retrieve the average rating for each product -- Write an SQL query to retrieve the average rating for each product. -- The result should include the product ID, product name, and the average rating. + +-- Used a 'LEFT JOIN' to include all products (even those without any reviews) +-- Grouping by p.product_id, p.product_nam, we consider all ratings for a specific product to calculate "average_rating" +SELECT p.product_id, p.product_name, AVG(r.rating) AS average_rating +FROM Products p +LEFT JOIN Reviews r ON r.product_id = p.product_id +GROUP BY p.product_id, p.product_name; + + -- Problem 4: Retrieve the top 5 users with the highest total amount spent on orders -- Write an SQL query to retrieve the top 5 users with the highest total amount spent on orders. -- The result should include the user ID, username, and the total amount spent. + +-- For this query, I considered using the 'Payments' table but after observing that payment amount and order amount for user_id = 2 are different +-- I'm assuming that Payment amount can include shipping costs, customs, etc. +-- For this query, I am only considering the amount spent on the actual 'Order' (from Orders table) + +-- Here, I have used 'LEFT JOIN' to include all users (even those that may have place no orders) +-- In PostgresSQL, The SUM() of an empty set will return NULL, not zero. +-- Hence, I have used the COALESCE() function to return zero instead of NULL in case there is no matching row. +-- The 'LIMIT 5' clause restricts the result to the top 5 users. +SELECT u.user_id, u.username, COALESCE(SUM(o.total_amount), 0) AS total_amount_spent +FROM Users u +LEFT JOIN Orders o ON o.user_id = u.user_id +GROUP BY u.user_id, u.username +ORDER BY total_amount_spent DESC +LIMIT 5; \ No newline at end of file diff --git a/sql/task2.sql b/sql/task2.sql index ad2596731..16307713d 100644 --- a/sql/task2.sql +++ b/sql/task2.sql @@ -3,17 +3,59 @@ -- The result should include the product ID, product name, and the average rating. -- Hint: You may need to use subqueries or common table expressions (CTEs) to solve this problem. + +SELECT p.product_id, p.product_name, AVG(r.rating) AS average_rating +FROM Products p +LEFT JOIN Reviews r on r.product_id = p.product_id +GROUP BY p.product_id, p.product_name +ORDER BY average_rating DESC +LIMIT 1; + + -- Problem 6: Retrieve the users who have made at least one order in each category -- Write an SQL query to retrieve the users who have made at least one order in each category. -- The result should include the user ID and username. -- Hint: You may need to use subqueries or joins to solve this problem. +-- Counts the distinct categories for each user and compares it to the total count of categories in 'Categories' +-- If equal, then user has made atleast one order in each category +SELECT u.user_id, u.username +FROM Users u +JOIN Orders o ON o.user_id = u.user_id +JOIN Order_Items oi on oi.order_id = o.order_id +JOIN Products p on p.product_id = oi.product_id +GROUP BY u.user_id, u.username +HAVING COUNT(DISTINCT p.category_id) = (SELECT COUNT(*) FROM Categories); + + + + -- Problem 7: Retrieve the products that have not received any reviews -- Write an SQL query to retrieve the products that have not received any reviews. -- The result should include the product ID and product name. -- Hint: You may need to use subqueries or left joins to solve this problem. + +SELECT p.product_id, p.product_name +FROM Products p +LEFT JOIN Reviews r on r.product_id = p.product_id +WHERE r.review_id IS NULL; + + + + -- Problem 8: Retrieve the users who have made consecutive orders on consecutive days -- Write an SQL query to retrieve the users who have made consecutive orders on consecutive days. -- The result should include the user ID and username. --- Hint: You may need to use subqueries or window functions to solve this problem. \ No newline at end of file +-- Hint: You may need to use subqueries or window functions to solve this problem. + +-- Calculates new column 'prev_order_date' for each order +WITH PrevOrders AS ( + SELECT user_id, order_id, order_date, LAG(order_date) OVER (PARTITION BY user_id ORDER BY order_date) AS prev_order_date + FROM Orders +) +-- Select only those users who have made consecutive orders on consecutive days +SELECT DISTINCT u.user_id, u.username +FROM Users u +JOIN PrevOrders o ON u.user_id = o.user_id +WHERE o.order_date = o.prev_order_date + INTERVAL 1 DAY; diff --git a/sql/task3.sql b/sql/task3.sql index f078a9439..8ad95f38a 100644 --- a/sql/task3.sql +++ b/sql/task3.sql @@ -3,17 +3,81 @@ -- The result should include the category ID, category name, and the total sales amount. -- Hint: You may need to use subqueries, joins, and aggregate functions to solve this problem. +-- Calculates total sales amount for each category and then selects the top 3 categories +SELECT C.category_id, C.category_name, SUM(O.total_amount) AS total_sales_amount +FROM Categories C +JOIN Products P ON C.category_id = P.category_id +JOIN Order_Items OI ON P.product_id = OI.product_id +JOIN Orders O ON OI.order_id = O.order_id +GROUP BY C.category_id, C.category_name +ORDER BY total_sales_amount DESC +LIMIT 3; + + -- Problem 10: Retrieve the users who have placed orders for all products in the Toys & Games -- Write an SQL query to retrieve the users who have placed orders for all products in the Toys & Games -- The result should include the user ID and username. -- Hint: You may need to use subqueries, joins, and aggregate functions to solve this problem. +WITH ToysAndGamesProducts AS ( + SELECT DISTINCT p.product_id + FROM Products p + JOIN Categories c ON p.category_id = c.category_id + WHERE c.category_name = 'Toys & Games' +) +SELECT u.user_id, u.username +FROM Users u +JOIN Orders o on o.user_id = u.user_id +JOIN Order_Items oi on oi.order_id = o.order_id +JOIN Products p on p.product_id = oi.product_id +JOIN Categories c on c.category_id = p.category_id +JOIN ToysAndGamesProducts tg ON p.product_id = tg.product_id +GROUP BY u.user_id, u.username +HAVING COUNT(DISTINCT p.product_id) = (SELECT COUNT(*) FROM ToysAndGamesProducts); + + + + + -- Problem 11: Retrieve the products that have the highest price within each category -- Write an SQL query to retrieve the products that have the highest price within each category. -- The result should include the product ID, product name, category ID, and price. -- Hint: You may need to use subqueries, joins, and window functions to solve this problem. + + +-- This gives the max_price within each category +WITH MaxPrice AS ( + SELECT category_id, MAX(price) as max_price + FROM Products + GROUP BY category_id +) + +SELECT p.product_id, p.product_name, mp.category_id, p.price +FROM Products p +JOIN MaxPrice mp on p.category_id = mp.category_id AND p.price = mp.max_price; + + + -- Problem 12: Retrieve the users who have placed orders on consecutive days for at least 3 days -- Write an SQL query to retrieve the users who have placed orders on consecutive days for at least 3 days. -- The result should include the user ID and username. -- Hint: You may need to use subqueries, joins, and window functions to solve this problem. + +-- Calculates two new column 'order_date2' and 'order_date3' for each order +WITH ThreeDaysOrders AS ( + SELECT + o.user_id, o.order_date, + LAG(o.order_date, 1) OVER (PARTITION BY o.user_id ORDER BY o.order_date) AS order_date2, + LAG(o.order_date, 2) OVER (PARTITION BY o.user_id ORDER BY o.order_date) AS order_date3 + FROM Orders o +) +-- Only retrieve users for whom the current order date is one day after the second-order date and two days after the third-order date +SELECT u.user_id, u.username +FROM Users u +JOIN ThreeDaysOrders o ON u.user_id = o.user_id +WHERE o.order_date2 = DATE_SUB(o.order_date, INTERVAL 1 DAY) + AND o.order_date3 = DATE_SUB(o.order_date, INTERVAL 2 DAY); + + +