diff --git a/02_activities/assignments/Assignment2.md b/02_activities/assignments/Assignment2.md index 5cbb4e70f..834b53e62 100644 --- a/02_activities/assignments/Assignment2.md +++ b/02_activities/assignments/Assignment2.md @@ -55,6 +55,27 @@ The store wants to keep customer addresses. Propose two architectures for the CU ``` Your answer... + +Option 1 – Overwrite + +One record per customer in CUSTOMER_ADDRESS. + +When a customer changes their address, the row is updated with the new information. + +No history of previous addresses is kept. + +This is Type 1 Slowly Changing Dimension + +Option 2 – Retain Changes (History) + +Multiple records per customer in CUSTOMER_ADDRESS. + +Each address has start_date and end_date columns. + +When a customer moves, a new record is inserted with the new address, and the old record is marked inactive by setting end_date. + +This is Type 2 Slowly Changing Dimension. + ``` *** diff --git a/02_activities/assignments/Assignment2_section1.jpg b/02_activities/assignments/Assignment2_section1.jpg new file mode 100644 index 000000000..9e8f5dd93 Binary files /dev/null and b/02_activities/assignments/Assignment2_section1.jpg differ diff --git a/02_activities/assignments/assignment2.sql b/02_activities/assignments/assignment2.sql index 5ad40748a..78eed7b7a 100644 --- a/02_activities/assignments/assignment2.sql +++ b/02_activities/assignments/assignment2.sql @@ -19,6 +19,12 @@ HINT: keep the syntax the same, but edited the correct components with the strin The `||` values concatenate the columns into strings. Edit the appropriate columns -- you're making two edits -- and the NULL rows will be fixed. All the other rows will remain the same.) */ +SELECT +product_name || ', ' || COALESCE(product_size,'') || ' (' || COALESCE(product_qty_type,'') || ')' +FROM product; +SELECT +product_name || ', ' || COALESCE(product_size,'unit') || ' (' || COALESCE(product_qty_type,'unit') || ')' +FROM product; @@ -32,17 +38,41 @@ each new market date for each customer, or select only the unique market dates p (without purchase details) and number those visits. HINT: One of these approaches uses ROW_NUMBER() and one uses DENSE_RANK(). */ +SELECT + customer_id, + market_date, + ROW_NUMBER() OVER(PARTITION BY customer_id ORDER BY market_date) AS visit_number +FROM customer_purchases; /* 2. Reverse the numbering of the query from a part so each customer’s most recent visit is labeled 1, then write another query that uses this one as a subquery (or temp table) and filters the results to only the customer’s most recent visit. */ +SELECT * +FROM ( +SELECT + customer_id, + market_date, + ROW_NUMBER() OVER(PARTITION BY customer_id ORDER BY market_date DESC) AS visit_number + FROM customer_purchases +) sub +WHERE visit_number = 1; + + /* 3. Using a COUNT() window function, include a value along with each row of the customer_purchases table that indicates how many different times that customer has purchased that product_id. */ +SELECT +customer_id, +product_id, +market_date, +quantity, +cost_to_customer_per_qty, +COUNT(*) OVER(PARTITION BY customer_id, product_id) AS times_purchased +FROM customer_purchases; -- String manipulations @@ -57,10 +87,24 @@ Remove any trailing or leading whitespaces. Don't just use a case statement for Hint: you might need to use INSTR(product_name,'-') to find the hyphens. INSTR will help split the column. */ +SELECT + product_name, + + TRIM( + SUBSTR( + product_name, + INSTR(product_name,'-') + 1 + ) + ) AS description +FROM product +WHERE INSTR(product_name,'-') > 0; /* 2. Filter the query to show any product_size value that contain a number with REGEXP. */ +SELECT * +FROM product +WHERE product_size REGEXP '[0-9]'; -- UNION @@ -73,6 +117,36 @@ HINT: There are a possibly a few ways to do this query, but if you're struggling 3) Query the second temp table twice, once for the best day, once for the worst day, with a UNION binding them. */ +--Aggregate total sales per date +WITH sales_per_day AS ( + SELECT + market_date, + SUM(quantity * cost_to_customer_per_qty) AS total_sales + FROM customer_purchases + GROUP BY market_date +) + +--Rank the dates by total sales +, ranked_sales AS ( + SELECT + market_date, + total_sales, + RANK() OVER(ORDER BY total_sales DESC) AS rank_high, -- highest sales + RANK() OVER(ORDER BY total_sales ASC) AS rank_low -- lowest sales + FROM sales_per_day +) + +--Select best and worst days +SELECT 'Best Day' AS label, market_date, total_sales +FROM ranked_sales +WHERE rank_high = 1 + +UNION + +SELECT 'Worst Day' AS label, market_date, total_sales +FROM ranked_sales +WHERE rank_low = 1; + @@ -89,6 +163,26 @@ Think a bit about the row counts: how many distinct vendors, product names are t How many customers are there (y). Before your final group by you should have the product of those two queries (x*y). */ +SELECT + v.vendor_name, + p.product_name, + SUM(5 * vi.original_price) AS potential_revenue +FROM + (SELECT DISTINCT vendor_id, vendor_name FROM vendor) v +CROSS JOIN + (SELECT DISTINCT customer_id FROM customer) c +JOIN + vendor_inventory vi + ON vi.vendor_id = v.vendor_id +JOIN + product p + ON p.product_id = vi.product_id +GROUP BY + v.vendor_name, + p.product_name +ORDER BY + v.vendor_name, + p.product_name; -- INSERT @@ -98,17 +192,50 @@ It should use all of the columns from the product table, as well as a new column Name the timestamp column `snapshot_timestamp`. */ +DROP TABLE IF EXISTS product_units; + +CREATE TABLE product_units AS +SELECT + *, + CURRENT_TIMESTAMP AS snapshot_timestamp +FROM product +WHERE product_qty_type = 'unit'; + +SELECT * FROM product_units; + + + + + /*2. Using `INSERT`, add a new row to the product_units table (with an updated timestamp). This can be any product you desire (e.g. add another record for Apple Pie). */ +INSERT INTO product_units (product_id, product_name, product_size, product_category_id, product_qty_type, snapshot_timestamp) +VALUES ( + 1000, -- new product_id + 'Apple Pie', -- product_name + '1 unit', -- product_size + 100, --product_category_id + 'unit', -- product_qty_type + CURRENT_TIMESTAMP -- snapshot_timestamp +); +--SELECT * FROM product_units +--WHERE product_id = 1000; -- DELETE /* 1. Delete the older record for the whatever product you added. HINT: If you don't specify a WHERE clause, you are going to have a bad time.*/ +DELETE FROM product_units +WHERE product_name = 'Apple Pie' + AND snapshot_timestamp < ( + SELECT MAX(snapshot_timestamp) + FROM product_units + WHERE product_name = 'Apple Pie' + ); -- UPDATE @@ -128,6 +255,32 @@ Finally, make sure you have a WHERE statement to update the right row, you'll need to use product_units.product_id to refer to the correct row within the product_units table. When you have all of these components, you can run the update statement. */ - +ALTER TABLE product_units +ADD COLUMN current_quantity INT; + +SELECT vi.product_id, vi.quantity +FROM vendor_inventory vi +INNER JOIN ( +SELECT product_id, MAX(market_date) AS last_date +FROM vendor_inventory +GROUP BY product_id +) latest +ON vi.product_id = latest.product_id AND vi.market_date = latest.last_date; + + +UPDATE product_units +SET current_quantity = COALESCE(( +SELECT vi.quantity +FROM vendor_inventory vi +INNER JOIN ( +SELECT product_id, MAX(market_date) AS last_date +FROM vendor_inventory +GROUP BY product_id + ) latest +ON vi.product_id = latest.product_id AND vi.market_date = latest.last_date +WHERE vi.product_id = product_units.product_id +), 0); + +SELECT * FROM product_units