Skip to content
40 changes: 40 additions & 0 deletions sql/task_1.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
-- Problem 1: Retrieve all products in the Sports category
-- Assigning the product data table an alias and selecting all the columns that need to be retrieved from the product data table
-- Assigning the category data table an alias
-- Joining the two tables together based on the column name that is identical in both product data table and category data table
-- Specifying the category name to retrieve all the products from that category

SELECT pd.product_id, pd.product_name, pd.description, pd.price, cd.category_name FROM product_data pd
JOIN category_data cd ON pd.category_id = cd.category_id
WHERE cd.category_name = 'Sports & Outdoors';

-- Problem 2: Retrieve the total number of orders for each user
-- Assigning the user data table an alias and selecting all the columns that need to be retrieved, from the user data table
-- Assigning the order data table an alias and renaming column order_id as total_orders
-- Used COUNT to count the total number of orders for each user from the order data table
-- Joining the two tables together based on the column name that is identical in both user data table and order data table

SELECT ud.user_id, ud.username, COUNT(od.order_id) AS total_orders FROM user_data ud
JOIN order_data od ON ud.user_id = od.user_id
GROUP BY ud.user_id, ud.username, od.order_id

-- Problem 3: Retrieve the average rating for each product
-- Assigning the product data table an alias and selecting all the columns that need to be retrieved from the product data table
-- Assigning the review data table an alias and renaming column rating as average_rating
-- Used AVG to average the ratings for each product from the review data table
-- Joining the two tables together based on the column name that is identical in both product data table and review data table

SELECT pd.product_id, pd.product_name, AVG(rd.rating) AS average_rating FROM product_data pd
JOIN review_data rd ON pd.product_id = rd.product_id
GROUP BY pd.product_id, pd.product_name, rd.rating

-- Problem 4: Retrieve the top 5 users with the highest total amount spent on orders
-- Assigning the user data table an alias and selecting all the columns that need to be retrieved from the user data table
-- Assigning the order data table an alias
-- Joining the two tables together based on the column name that is identical in both product data table and category data table
-- Ordering the total_amount column in the table in descending order and displaying the top 5 highest total amounts
SELECT ud.user_id, ud.username, od.total_amount FROM user_data ud
JOIN order_data od ON ud.user_id = od.user_id
GROUP BY ud.user_id, ud.username, od.total_amount
ORDER BY od.total_amount DESC
LIMIT 5;
47 changes: 47 additions & 0 deletions sql/task_2.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
-- Problem 5: Retrieve the products with the highest average rating
-- Assigning the product data table an alias and selecting all the columns that need to be retrieved from the product data table
-- Assigning the review data table an alias and renaming column rating as average rating
-- Used AVG to average the ratings for each product from the review data table
-- Joining the two tables together based on the column name that is identical in both product data table and review data table
-- Ordering the average_rating column in the table in descending order and displaying the top 5 highest average ratings

SELECT pd.product_id, pd.product_name, AVG(rd.rating) AS average_rating FROM product_data pd
JOIN review_data rd ON pd.product_id = rd.product_id
GROUP BY pd.product_id, pd.product_name
ORDER BY average_rating DESC
LIMIT 5;

-- Problem 6: Retrieve the users who have made at least one order in each category
-- Assigning the user data table an alias and selecting all the columns that need to be retrieved from the user data table
-- Assigning the order data table an alias
-- Joining two tables together based on the column name that is identical in both user data table and order data table
-- Assigning the order items data table an alias
-- Joining two tables together based on the column name that is identical in both order data table and order items data table

SELECT ud.user_id, ud.username FROM user_data ud
JOIN order_data od ON ud.user_id = od.user_id
JOIN order_items_data oid ON od.order_id = oid.order_id
GROUP BY ud.user_id, ud.username

-- Problem 7: Retrieve the products that have not received any reviews
-- Assigning the product data table an alias and selecting all the columns that need to be retrieved from the product data table
-- Assigning the review data table an alias
-- Joining the two tables together based on the column name that is identical in both product data table and review data table
-- Checking the condition where the review text column and rating column have no value

SELECT pd.product_id, pd.product_name FROM product_data pd
JOIN review_data rd ON pd.product_id = rd.product_id
WHERE rd.review_text IS NULL AND rd.rating IS NULL;

-- Problem 8: Retrieve the users who have made consecutive orders on consecutive days
-- Assigning the user data table an alias and selecting all the columns that need to be retrieved from the user data table
-- Assigning the order data table an alias
-- Joining two tables together based on the column name that is identical in both user data table and order data table
-- Again joining two tables together, with a different alias, and same user id, to show the comparison of the user's orders between two dates
-- HAVING COUNT is used to ensure the user has consecutive orders on consecutive days

SELECT ud.user_id, ud.username FROM user_data ud
JOIN order_data od ON ud.user_id = od.user_id
JOIN order_data odtwo ON ud.user_id = odtwo.user_id AND odtwo.order_date = DATE_ADD(od.order_date, INTERVAL 1 DAY)
GROUP BY ud.user_id, ud.username
HAVING COUNT(DISTINCT od.order_id) > 1;
65 changes: 65 additions & 0 deletions sql/task_3.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
-- Problem 9: Retrieve the top 3 categories with the highest total sales amount
-- Assigning the category data table an alias and selecting all the columns that need to be retrieved from the category data table
-- Multiplying the quantity with price from the order items data table to get the total sales amount
-- Assigning the product data table an alias
-- Joining the two tables together based on the column name that is identical in both product data table and category data table
-- Assigning the order items data table an alias
-- Joining the two tables together based on the column name that is identical in both order items data table and product data table
-- Ordering the total sales amount column in the table in descending order and displaying the top 3 highest total sales amount

SELECT cd.category_id, cd.category_name, SUM(oid.quantity*oid.unit_price) AS total_sales_amount FROM category_data cd
JOIN product_data pd ON cd.category_id = pd.category_id
JOIN order_items_data oid ON pd.product_id = oid.product_id
GROUP BY cd.category_id, cd.category_name
ORDER BY total_sales_amount DESC
LIMIT 3;

-- Problem 10: Retrieve the users who have placed orders for all products in the Toys & Games
-- Assigning the user data table an alias and selecting all the columns that need to be retrieved from the user data table
-- Assigning the order data table an alias
-- Joining the two tables together based on the column name that is identical in both user data table and order data table
-- Assigning the order items data table an alias
-- Joining the two tables together based on the column name that is identical in both order data table and order items data table
-- Assigning the product data table an alias
-- Joining the two tables together based on the column name that is identical in both order items data table and product data table
-- Assigning the category data table an alias
-- Joining the two tables together based on the column name that is identical in both product data table and category data table
-- Specifying the category name to retrieve all the products from that category

SELECT ud.user_id, ud.username FROM user_data ud
JOIN order_data od ON ud.user_id = od.user_id
JOIN order_items_data oid ON od.order_id = oid.order_id
JOIN product_data pd ON oid.product_id = pd.product_id
JOIN category_data cd ON pd.category_id = cd.category_id
WHERE cd.category_name = 'Toys & Games'
GROUP BY ud.user_id, ud.username

-- Problem 11: Retrieve the products that have the highest price within each category
-- Make a CTE for the Highest priced products in each category
-- Window function is used to create a row number that will be displayed in descending order
-- Selecting all the columns that need to be displayed from the CTE
-- Row number stores the highest priced product from each category

WITH HighestPricedProducts AS (
SELECT product_id, product_name, category_id, price, ROW_NUMBER() OVER (PARTITION BY category_id ORDER BY price DESC) AS row_numbers
FROM product_data
)
SELECT product_id, product_name, category_id, price
FROM HighestPricedProducts
WHERE row_numbers = 1;


-- Problem 12: Retrieve the users who have placed orders on consecutive days for at least 3 days
-- Assigning the user data table an alias and selecting all the columns that need to be retrieved from the user data table
-- Assigning the order data table an alias
-- Joining the two tables together based on the column name that is identical in both user data table and order data table
-- Joining two tables together for the second time, with a different alias, and same user id, to show the comparison of the user's orders between the first and second day
-- Joining two tables together for the third time, with a different alias, and same user id, to show the comparison of the user's orders between the second and third day
-- HAVING COUNT is used to ensure the user has consecutive orders on consecutive days

SELECT ud.user_id, ud.username FROM user_data ud
JOIN order_data od ON ud.user_id = od.user_id
JOIN order_data odtwo ON ud.user_id = odtwo.user_id AND odtwo.order_date = DATE_ADD(od.order_date, INTERVAL 1 DAY)
JOIN order_data odthree ON ud.user_id = odthree.user_id AND odthree.order_date = DATE_ADD(odtwo.order_date, INTERVAL 2 DAY)
GROUP BY ud.user_id, ud.username
HAVING COUNT(DISTINCT od.order_id) > 2;