diff --git a/sql/task_1.sql b/sql/task_1.sql new file mode 100644 index 000000000..cca16436e --- /dev/null +++ b/sql/task_1.sql @@ -0,0 +1,40 @@ +-- Problem 1: Retrieve all products in the Sports category +-- Assigning the product data table an alias and selecting all the columns that need to be retrieved from the product data table +-- Assigning the category data table an alias +-- Joining the two tables together based on the column name that is identical in both product data table and category data table +-- Specifying the category name to retrieve all the products from that category + +SELECT pd.product_id, pd.product_name, pd.description, pd.price, cd.category_name FROM product_data pd +JOIN category_data cd ON pd.category_id = cd.category_id +WHERE cd.category_name = 'Sports & Outdoors'; + +-- Problem 2: Retrieve the total number of orders for each user +-- Assigning the user data table an alias and selecting all the columns that need to be retrieved, from the user data table +-- Assigning the order data table an alias and renaming column order_id as total_orders +-- Used COUNT to count the total number of orders for each user from the order data table +-- Joining the two tables together based on the column name that is identical in both user data table and order data table + +SELECT ud.user_id, ud.username, COUNT(od.order_id) AS total_orders FROM user_data ud +JOIN order_data od ON ud.user_id = od.user_id +GROUP BY ud.user_id, ud.username, od.order_id + +-- Problem 3: Retrieve the average rating for each product +-- Assigning the product data table an alias and selecting all the columns that need to be retrieved from the product data table +-- Assigning the review data table an alias and renaming column rating as average_rating +-- Used AVG to average the ratings for each product from the review data table +-- Joining the two tables together based on the column name that is identical in both product data table and review data table + +SELECT pd.product_id, pd.product_name, AVG(rd.rating) AS average_rating FROM product_data pd +JOIN review_data rd ON pd.product_id = rd.product_id +GROUP BY pd.product_id, pd.product_name, rd.rating + +-- Problem 4: Retrieve the top 5 users with the highest total amount spent on orders +-- Assigning the user data table an alias and selecting all the columns that need to be retrieved from the user data table +-- Assigning the order data table an alias +-- Joining the two tables together based on the column name that is identical in both product data table and category data table +-- Ordering the total_amount column in the table in descending order and displaying the top 5 highest total amounts +SELECT ud.user_id, ud.username, od.total_amount FROM user_data ud +JOIN order_data od ON ud.user_id = od.user_id +GROUP BY ud.user_id, ud.username, od.total_amount +ORDER BY od.total_amount DESC +LIMIT 5; diff --git a/sql/task_2.sql b/sql/task_2.sql new file mode 100644 index 000000000..f28e0e65d --- /dev/null +++ b/sql/task_2.sql @@ -0,0 +1,47 @@ +-- Problem 5: Retrieve the products with the highest average rating +-- Assigning the product data table an alias and selecting all the columns that need to be retrieved from the product data table +-- Assigning the review data table an alias and renaming column rating as average rating +-- Used AVG to average the ratings for each product from the review data table +-- Joining the two tables together based on the column name that is identical in both product data table and review data table +-- Ordering the average_rating column in the table in descending order and displaying the top 5 highest average ratings + +SELECT pd.product_id, pd.product_name, AVG(rd.rating) AS average_rating FROM product_data pd +JOIN review_data rd ON pd.product_id = rd.product_id +GROUP BY pd.product_id, pd.product_name +ORDER BY average_rating DESC +LIMIT 5; + +-- Problem 6: Retrieve the users who have made at least one order in each category +-- Assigning the user data table an alias and selecting all the columns that need to be retrieved from the user data table +-- Assigning the order data table an alias +-- Joining two tables together based on the column name that is identical in both user data table and order data table +-- Assigning the order items data table an alias +-- Joining two tables together based on the column name that is identical in both order data table and order items data table + +SELECT ud.user_id, ud.username FROM user_data ud +JOIN order_data od ON ud.user_id = od.user_id +JOIN order_items_data oid ON od.order_id = oid.order_id +GROUP BY ud.user_id, ud.username + +-- Problem 7: Retrieve the products that have not received any reviews +-- Assigning the product data table an alias and selecting all the columns that need to be retrieved from the product data table +-- Assigning the review data table an alias +-- Joining the two tables together based on the column name that is identical in both product data table and review data table +-- Checking the condition where the review text column and rating column have no value + +SELECT pd.product_id, pd.product_name FROM product_data pd +JOIN review_data rd ON pd.product_id = rd.product_id +WHERE rd.review_text IS NULL AND rd.rating IS NULL; + +-- Problem 8: Retrieve the users who have made consecutive orders on consecutive days +-- Assigning the user data table an alias and selecting all the columns that need to be retrieved from the user data table +-- Assigning the order data table an alias +-- Joining two tables together based on the column name that is identical in both user data table and order data table +-- Again joining two tables together, with a different alias, and same user id, to show the comparison of the user's orders between two dates +-- HAVING COUNT is used to ensure the user has consecutive orders on consecutive days + +SELECT ud.user_id, ud.username FROM user_data ud +JOIN order_data od ON ud.user_id = od.user_id +JOIN order_data odtwo ON ud.user_id = odtwo.user_id AND odtwo.order_date = DATE_ADD(od.order_date, INTERVAL 1 DAY) +GROUP BY ud.user_id, ud.username +HAVING COUNT(DISTINCT od.order_id) > 1; diff --git a/sql/task_3.sql b/sql/task_3.sql new file mode 100644 index 000000000..56f21d9ab --- /dev/null +++ b/sql/task_3.sql @@ -0,0 +1,65 @@ +-- Problem 9: Retrieve the top 3 categories with the highest total sales amount +-- Assigning the category data table an alias and selecting all the columns that need to be retrieved from the category data table +-- Multiplying the quantity with price from the order items data table to get the total sales amount +-- Assigning the product data table an alias +-- Joining the two tables together based on the column name that is identical in both product data table and category data table +-- Assigning the order items data table an alias +-- Joining the two tables together based on the column name that is identical in both order items data table and product data table +-- Ordering the total sales amount column in the table in descending order and displaying the top 3 highest total sales amount + +SELECT cd.category_id, cd.category_name, SUM(oid.quantity*oid.unit_price) AS total_sales_amount FROM category_data cd +JOIN product_data pd ON cd.category_id = pd.category_id +JOIN order_items_data oid ON pd.product_id = oid.product_id +GROUP BY cd.category_id, cd.category_name +ORDER BY total_sales_amount DESC +LIMIT 3; + +-- Problem 10: Retrieve the users who have placed orders for all products in the Toys & Games +-- Assigning the user data table an alias and selecting all the columns that need to be retrieved from the user data table +-- Assigning the order data table an alias +-- Joining the two tables together based on the column name that is identical in both user data table and order data table +-- Assigning the order items data table an alias +-- Joining the two tables together based on the column name that is identical in both order data table and order items data table +-- Assigning the product data table an alias +-- Joining the two tables together based on the column name that is identical in both order items data table and product data table +-- Assigning the category data table an alias +-- Joining the two tables together based on the column name that is identical in both product data table and category data table +-- Specifying the category name to retrieve all the products from that category + +SELECT ud.user_id, ud.username FROM user_data ud +JOIN order_data od ON ud.user_id = od.user_id +JOIN order_items_data oid ON od.order_id = oid.order_id +JOIN product_data pd ON oid.product_id = pd.product_id +JOIN category_data cd ON pd.category_id = cd.category_id +WHERE cd.category_name = 'Toys & Games' +GROUP BY ud.user_id, ud.username + +-- Problem 11: Retrieve the products that have the highest price within each category +-- Make a CTE for the Highest priced products in each category +-- Window function is used to create a row number that will be displayed in descending order +-- Selecting all the columns that need to be displayed from the CTE +-- Row number stores the highest priced product from each category + +WITH HighestPricedProducts AS ( + SELECT product_id, product_name, category_id, price, ROW_NUMBER() OVER (PARTITION BY category_id ORDER BY price DESC) AS row_numbers + FROM product_data +) +SELECT product_id, product_name, category_id, price +FROM HighestPricedProducts +WHERE row_numbers = 1; + + +-- Problem 12: Retrieve the users who have placed orders on consecutive days for at least 3 days +-- Assigning the user data table an alias and selecting all the columns that need to be retrieved from the user data table +-- Assigning the order data table an alias +-- Joining the two tables together based on the column name that is identical in both user data table and order data table +-- Joining two tables together for the second time, with a different alias, and same user id, to show the comparison of the user's orders between the first and second day +-- Joining two tables together for the third time, with a different alias, and same user id, to show the comparison of the user's orders between the second and third day +-- HAVING COUNT is used to ensure the user has consecutive orders on consecutive days + +SELECT ud.user_id, ud.username FROM user_data ud +JOIN order_data od ON ud.user_id = od.user_id +JOIN order_data odtwo ON ud.user_id = odtwo.user_id AND odtwo.order_date = DATE_ADD(od.order_date, INTERVAL 1 DAY) +JOIN order_data odthree ON ud.user_id = odthree.user_id AND odthree.order_date = DATE_ADD(odtwo.order_date, INTERVAL 2 DAY) +GROUP BY ud.user_id, ud.username +HAVING COUNT(DISTINCT od.order_id) > 2;