diff --git a/02_activities/assignments/DC_Cohort/Assignment1.md b/02_activities/assignments/DC_Cohort/Assignment1.md index f78778f5b..01aae90b0 100644 --- a/02_activities/assignments/DC_Cohort/Assignment1.md +++ b/02_activities/assignments/DC_Cohort/Assignment1.md @@ -108,6 +108,8 @@ Please do not pick the exact same tables that I have already diagrammed. For exa *** + + ## Section 2: You can start this section following *session 2*. @@ -205,5 +207,5 @@ Consider, for example, concepts of fariness, inequality, social structures, marg ``` -Your thoughts... +One random example that I can think about that might be embedded within something as small as sinks and paper towel machines is their ability to detect skin tones. This is something I notice almost every day in the office and why I am even very passionate about equity-oriented design because with a lot of tools in society they are designed based on white skin, so when people who have darker skin tones try to use these devices they end up not working for us. While this might seem like a small problem when it comes to sinks and paper towel machines but this problem cascades when we start to this about facial recognition devices, pulse oximeters, and other technologies and tools that have deep rooted algorithmic biases embedded within their databases. ``` diff --git a/02_activities/assignments/DC_Cohort/assignment1.sql b/02_activities/assignments/DC_Cohort/assignment1.sql index c992e3205..b4ec53be1 100644 --- a/02_activities/assignments/DC_Cohort/assignment1.sql +++ b/02_activities/assignments/DC_Cohort/assignment1.sql @@ -4,17 +4,23 @@ --SELECT /* 1. Write a query that returns everything in the customer table. */ - +SELECT * +FROM customer; /* 2. Write a query that displays all of the columns and 10 rows from the cus- tomer table, sorted by customer_last_name, then customer_first_ name. */ - - +SELECT * +FROM customer +ORDER BY customer_last_name, customer_first_name +LIMIT 10; --WHERE /* 1. Write a query that returns all customer purchases of product IDs 4 and 9. */ - +SELECT * +FROM customer_purchases +WHERE product_id = 4 +OR product_id = 9; /*2. Write a query that returns all customer purchases and a new calculated column 'price' (quantity * cost_to_customer_per_qty), @@ -23,11 +29,18 @@ filtered by customer IDs between 8 and 10 (inclusive) using either: 2. one condition using BETWEEN */ -- option 1 +SELECT *, (quantity*cost_to_customer_per_qty) as 'price' +FROM customer_purchases +WHERE customer_id = 8 +AND 10; --- option 2 +-- option 2 +SELECT *, (quantity*cost_to_customer_per_qty) as 'price' +FROM customer_purchases +WHERE customer_id BETWEEN 8 AND 10; --CASE /* 1. Products can be sold by the individual unit or by bulk measures like lbs. or oz. @@ -35,18 +48,47 @@ Using the product table, write a query that outputs the product_id and product_n columns and add a column called prod_qty_type_condensed that displays the word “unit” if the product_qty_type is “unit,” and otherwise displays the word “bulk.” */ +SELECT * +,CASE WHEN product_qty_type = 'unit' +THEN 'unit' +ELSE 'bulk' +END AS product_qty_type_condensed +FROM product; /* 2. We want to flag all of the different types of pepper products that are sold at the market. add a column to the previous query called pepper_flag that outputs a 1 if the product_name contains the word “pepper” (regardless of capitalization), and otherwise outputs 0. */ +SELECT * +,CASE WHEN product_qty_type = 'unit' +THEN 'unit' +ELSE 'bulk' +END AS product_qty_type_condensed + +,CASE WHEN product_name LIKE '%pepper%' + THEN '1' + ELSE '0' + END as pepper_flag + + FROM product; --JOIN /* 1. Write a query that INNER JOINs the vendor table to the vendor_booth_assignments table on the vendor_id field they both have in common, and sorts the result by vendor_name, then market_date. */ +SELECT DISTINCT vendor_name, +market_date, +vb.vendor_id, +v.vendor_id + +FROM vendor as v +INNER JOIN vendor_booth_assignments as vb + ON vb.vendor_id = v.vendor_id + +ORDER BY vendor_name; + @@ -56,6 +98,9 @@ vendor_id field they both have in common, and sorts the result by vendor_name, t /* 1. Write a query that determines how many times each vendor has rented a booth at the farmer’s market by counting the vendor booth assignments per vendor_id. */ +SELECT vendor_id, COUNT(vendor_id) as num_of_rented_booths +FROM vendor_booth_assignments +GROUP BY vendor_id; /* 2. The Farmer’s Market Customer Appreciation Committee wants to give a bumper @@ -64,6 +109,19 @@ of customers for them to give stickers to, sorted by last name, then first name. HINT: This query requires you to join two tables, use an aggregate function, and use the HAVING keyword. */ +SELECT + customer_first_name, + customer_last_name, + SUM(quantity*cost_to_customer_per_qty) AS total_spent + +FROM customer_purchases AS cp +INNER JOIN customer AS c + ON cp.customer_id = c.customer_id + + GROUP BY customer_first_name, customer_last_name + + HAVING SUM(quantity*cost_to_customer_per_qty) > 2000 +ORDER BY customer_last_name, customer_first_name; --Temp Table @@ -78,7 +136,17 @@ When inserting the new vendor, you need to appropriately align the columns to be VALUES(col1,col2,col3,col4,col5) */ +DROP TABLE IF EXISTS temp.new_vendor; + +CREATE TABLE temp.new_vendor AS +SELECT * +FROM vendor; + +INSERT INTO new_vendor (vendor_id, vendor_name, vendor_type, vendor_owner_first_name, vendor_owner_last_name) +VALUES (10, 'Thomass Superfood Store', 'Fresh Focused', 'Thomas', 'Rosenthal'); +SELECT * +FROM temp.new_vendor -- Date /*1. Get the customer_id, month, and year (in separate columns) of every purchase in the customer_purchases table. diff --git a/02_activities/assignments/DC_Cohort/images/sq1diagram.drawio.png b/02_activities/assignments/DC_Cohort/images/sq1diagram.drawio.png new file mode 100644 index 000000000..3fc81e299 Binary files /dev/null and b/02_activities/assignments/DC_Cohort/images/sq1diagram.drawio.png differ diff --git a/04_this_cohort/live_code/DC/module_2/module_2.sqbpro b/04_this_cohort/live_code/DC/module_2/module_2.sqbpro index 73ace631b..c715d62b6 100644 --- a/04_this_cohort/live_code/DC/module_2/module_2.sqbpro +++ b/04_this_cohort/live_code/DC/module_2/module_2.sqbpro @@ -1,95 +1,148 @@ -
/* MODULE 2 */ +
/* MODULE 2 */ /* SELECT */ /* 1. Select everything in the customer table */ -SELECT +SELECT * +FROM customer; /* 2. Use sql as a calculator */ - - +SELECT 1+1 as something, 10*5 as somethingelse, pi() as pi; /* 3. Add order by and limit clauses */ +SELECT * +FROM customer +ORDER BY customer_last_name +LIMIT 10; - -/* 4. Select multiple specific columns * / - +/* 4. Select multiple specific columns */ +SELECT customer_id, customer_first_name +FROM customer; -/* 5. Add a static value in a column *//* MODULE 2 */ +/* 5. Add a static value in a column */ +SELECT 2025 as this_year, 'October' as this_month, customer_id +FROM customer +/* MODULE 2 */ /* WHERE */ /* 1. Select only customer 1 from the customer table */ SELECT * FROM customer -WHERE +WHERE customer_id = 1; /* 2. Differentiate between AND and OR */ +SELECT * +FROM customer +WHERE customer_id = 1 +OR customer_id = 2; /* 3. IN */ - - +SELECT * +FROM customer +WHERE customer_id IN(3,4,5) +OR customer_postal_code IN('M4M', 'M1L'); -- customers in these postal codes /* 4. LIKE */ +-- all the peppers +SELECT * FROM product +WHERE product_name LIKE '%pepper%'; +-- customer with a last name starting with a +SELECT * FROM customer +WHERE customer_last_name LIKE 'a%'; +/* 5. Nulls and Blanks*/ -/* 5. Nulls and Blanks* / +SELECT * FROM product +WHERE product_size IS NULL --null +OR product_size = ''; --blank +/* 6. BETWEEN x AND y */ +SELECT * +FROM customer +WHERE customer_id BETWEEN 1 AND 20; -/* 6. BETWEEN x AND y *//* MODULE 2 */ +--dates +SELECT market_date, market_day, market_year +FROM market_date_info + +WHERE market_date BETWEEN '2022-10-01' AND '2022-10-31'/* MODULE 2 */ /* CASE */ SELECT * /* 1. Add a CASE statement declaring which days vendors should come */ - +,CASE WHEN vendor_type = 'Fresh Focused' THEN 'Wednesday' + WHEN vendor_type = 'Prepared Foods' THEN 'Thursday' + ELSE 'Saturday' +END as day_of_specialty /* 2. Add another CASE statement for Pie Day */ - +,CASE WHEN vendor_name = "Annie's Pies" -- double quotes will work here! + THEN 'Annie is great' + END as pie_day /* 3. Add another CASE statement with an ELSE clause to handle rows evaluating to False */ +,CASE WHEN vendor_name LIKE '%pie%' + THEN 'Wednesday' + ELSE 'Friday' + END as also_pie_day - + FROM vendor; /* 4. Experiment with selecting a different column instead of just a string value */ +SELECT * +,CASE WHEN cost_to_customer_per_qty < '1.00' +THEN cost_to_customer_per_qty *5 +ELSE cost_to_customer_per_qty +END as inflation - -FROM vendor/* MODULE 2 */ +FROM customer_purchases/* MODULE 2 */ /* DISTINCT */ /* 1. Compare how many customer_ids are the customer_purchases table, one select with distinct, one without */ -- 4221 rows -SELECT customer_id FROM customer_purchases +SELECT customer_id FROM customer_purchases ; + +SELECT DISTINCT customer_id FROM customer_purchases; /* 2. Compare the difference between selecting market_day in market_date_info, with and without distinct: what do these difference mean?*/ +-- marker is open for 150 days + SELECT market_day +FROM market_date_info; +SELECT DISTINCT market_day +FROM market_date_info; /* 3. Which vendor has sold products to a customer */ +SELECT DISTINCT vendor_id +FROM customer_purchases; - -/* 4. Which vendor has sold products to a customer ... and which product was it * / - +/* 4. Which vendor has sold products to a customer ... and which product was it */ +SELECT DISTINCT vendor_id, product_id +FROM customer_purchases; /* 5. Which vendor has sold products to a customer ... and which product was it? ... AND to whom was it sold*/ - -/* MODULE 2 */ +SELECT DISTINCT vendor_id, product_id, customer_id +FROM customer_purchases +/* MODULE 2 */ /* INNER JOIN */ @@ -97,6 +150,16 @@ SELECT customer_id FROM customer_purchases ... use an INNER JOIN to see only products that have been purchased */ -- without table aliases +SELECT product_name, --coming from the product TABLE +vendor_id, -- rest are coming from customer purchases +market_date, +customer_id, +customer_purchases.product_id, +product.product_id + +FROM product +INNER JOIN customer_purchases + ON customer_purchases.product_id = product.product_id; @@ -107,24 +170,58 @@ SELECT customer_id FROM customer_purchases Add customers' first and last names with an INNER JOIN */ -- using table aliases - - -/* MODULE 2 */ +SELECT DISTINCT +vendor_id, +product_id, +c.customer_id, +customer_first_name, -- coming from customer +customer_last_name -- coming from customer + +FROM customer_purchases as cp +INNER JOIN customer as c + ON c.customer_id = cp.customer_id + +/* MODULE 2 */ /* LEFT JOIN */ /* 1. There are products that have been bought ... but are there products that have not been bought? Use a LEFT JOIN to find out*/ +SELECT DISTINCT +p.product_id +,cp.product_id as [cp.product_id] +,product_name +FROM product as p +LEFT JOIN customer_purchases as cp + ON p.product_id = cp.product_id; /* 2. Directions of LEFT JOINs matter ...*/ +-- only products that have been sold.. because there are no product ids in cp tgat arent in product +SELECT DISTINCT +p.product_id +,cp.product_id as [cp.product_id] +,product_name + +FROM customer_purchases as p +LEFT JOIN product as cp + ON p.product_id = cp.product_id; /* 3. As do which values you filter on ... */ +SELECT DISTINCT +p.product_id +,cp.product_id as [cp.product_id] +,product_name +FROM customer_purchases as p +LEFT JOIN product as cp + ON p.product_id = cp.product_id + +WHERE cp.product_id BETWEEN 1 AND 6; --if we pick product, 6 rows (1-6), otherwise 5 rows becauses zinnias not been bought @@ -141,7 +238,12 @@ LEFT JOIN product AS p ...Note how the row count changed from 24 to 23 */ -/* MODULE 2 */ +SELECT * + +FROM product_category AS pc +RIGHT JOIN product AS p + ON pc.product_category_id = p.product_category_id + ORDER by pc.product_category_id/* MODULE 2 */ /* Multiple Table JOINs */ @@ -149,12 +251,38 @@ LEFT JOIN product AS p (Which vendor has sold products to a customer AND which product was it AND to whom was it sold) Replace all the IDs (customer, vendor, and product) with the names instead*/ - - - -/* 2. Select product_category_name, everything from the product table, and then LEFT JOIN the customer_purchases table +SELECT DISTINCT +--vendor_id +vendor_name +---,product_id +,product_name + --,customer_id -- first/last name + ,customer_first_name + ,customer_last_name + + +FROM customer_purchases as cp +INNER JOIN customer as c + ON c.customer_id = cp.customer_id +INNER JOIN vendor as v + ON v.vendor_id = cp.vendor_id +INNER JOIN product as p + ON p.product_id = cp.product_id; + + + +/* 2. Select product_category_name, everything from the product table, +and then LEFT JOIN the customer_purchases table ... how does this LEFT JOIN affect the number of rows? Why do we have more rows now?*/ - +SELECT product_category_name, p.*, cp.product_id as productid_in_purchases_table + +FROM product_category as pc +INNER JOIN product as p + ON p.product_category_id = pc.product_category_id +LEFT JOIN customer_purchases as cp + ON cp. product_id = p.product_id + +ORDER BY cp.product_id
diff --git a/04_this_cohort/live_code/DC/module_3/module_3.sqbpro b/04_this_cohort/live_code/DC/module_3/module_3.sqbpro index 3d421003d..aae1a6809 100644 --- a/04_this_cohort/live_code/DC/module_3/module_3.sqbpro +++ b/04_this_cohort/live_code/DC/module_3/module_3.sqbpro @@ -1,124 +1,236 @@ -
/* MODULE 3 */ +
/* MODULE 3 */ /* COUNT */ /* 1. Count the number of products */ - + SELECT count(product_id) as num_of_product + FROM product; /* 2. How many products per product_qty_type */ - +SELECT product_qty_type, COUNT(product_id) as num_of_product +FROM product +GROUP BY product_qty_type /* 3. How many products per product_qty_type and per their product_size */ +SELECT product_size +,product_qty_type +, COUNT(product_id) as num_of_product +FROM product +GROUP BY product_size, product_qty_type - +ORDER BY product_qty_type; /* COUNT DISTINCT 4. How many unique products were bought */ +SELECT COUNT(DISTINCT product_id) as bought_prods +FROM customer_purchases - -/* MODULE 3 */ +/* MODULE 3 */ /* SUM & AVG */ -/* 1. How much did customers spend each day */ +/* 1. How much did customers spend each (per) day */ +SELECT +market_date +,customer_id +,SUM(quantity*cost_to_customer_per_qty) as total_spend +FROM customer_purchases +GROUP BY market_date +,customer_id; /* 2. How much does each customer spend on average */ +SELECT +customer_first_name +,customer_last_name +,ROUND(AVG(quantity*cost_to_customer_per_qty),2) as avg_spend + +FROM customer_purchases as cp +INNER JOIN customer as c + ON c.customer_id = cp.customer_id + +GROUP BY c.customer_id -/* MODULE 3 */ +/* MODULE 3 */ /* MIN & MAX */ /* 1. What is the most expensive product ...pay attention to how it doesn't handle ties very well */ + SELECT product_name, max(original_price) as most_expensive + FROM vendor_inventory as vi + INNER JOIN product as p + ON p.product_id = vi.product_id; /* 2. Prove that max is working */ +SELECT DISTINCT +product_name, +original_price + + FROM vendor_inventory as vi + INNER JOIN product as p + ON p.product_id = vi.product_id; /* 3. Find the minimum price per each product_qty_type */ +SELECT product_name +,product_qty_type +,min(original_price) +FROM vendor_inventory as vi +INNER JOIN product as p + ON p.product_id = vi.product_id + +GROUP BY product_qty_type; /* 4. Prove that min is working */ +SELECT DISTINCT product_name +,product_qty_type +---,min(original_price) +,original_price +FROM vendor_inventory as vi +INNER JOIN product as p + ON p.product_id = vi.product_id; /* 5. Min/max on a string ... not particularly useful? */ +SELECT max(product_name) +FROM product - -/* MODULE 3 */ +/* MODULE 3 */ /* Arithmitic */ /* 1. power, pi(), ceiling, division, integer division, etc */ -SELECT +SELECT power(4,2), pi(); +SELECT 10.0 / 3.0 as division, +CAST(10.0 as INT) / CAST(3.0 as INT) as integer_division; /* 2. Every even vendor_id with modulo */ - +SELECT * FROM vendor +WHERE vendor_id % 2 = 0; /* 3. What about every third? */ - -/* MODULE 3 */ +SELECT * FROM vendor +WHERE vendor_id % 3 = 1 +/* MODULE 3 */ /* HAVING */ /* 1. How much did a customer spend on each day? Filter to customer_id between 1 and 5 and total_cost > 50 ... What order of execution occurs?*/ + SELECT + market_date + ,customer_id + , SUM(quantity*cost_to_customer_per_qty) as total_spend + FROM customer_purchases + WHERE customer_id BETWEEN 1 AND 5 + + GROUP BY market_date, customer_id + HAVING total_spend > 50; + + /* 2. How many products were bought? Filter to number of purchases between 300 and 500 */ +SELECT count(product_id) as num_of_prod, product_id +FROM customer_purchases +GROUP BY product_id +HAVING count(product_id) BETWEEN 300 AND 500 -/* MODULE 3 */ + +/* MODULE 3 */ /* Subquery FROM */ /*1. Simple subquery in a FROM statement, e.g. for inflation ...we could imagine joining this to a more complex query perhaps */ +SELECT DISTINCT +product_id, inflation +FROM ( + SELECT product_id, cost_to_customer_per_qty, + CASE WHEN cost_to_customer_per_qty <'1.00' THEN cost_to_customer_per_qty*5 + ELSE cost_to_customer_per_qty END as inflation + + FROM customer_purchases -/* 2. What is the single item that has been bought in the greatest quantity?*/ +); -/* MODULE 3 */ + +/* 2. What is the single item that has been bought in the greatest quantity?*/ +SELECT product_name, MAX(quantity_purchased) + +FROM product as p +INNER JOIN ( +-- inner QUERY + SELECT product_id + ,count(quantity) as quantity_purchased + + FROM customer_purchases + GROUP BY product_id + +)AS x ON p.product_id = x.product_id +/* MODULE 3 */ /* Subquery WHERE */ /* 1. How much did each customer spend at each vendor for each day at the market WHEN IT RAINS */ +SELECT +market_date, +customer_id, +vendor_id, +SUM(quantity*cost_to_customer_per_qty) as total_spend +FROM customer_purchases +--filter by rain_flag +-- when it was raining +WHERE market_date IN( -/* 2. What is the name of the vendor who sells pie */ + SELECT market_date + FROM market_date_info + WHERE market_rain_flag = 1 -/* MODULE 3 */ -/* Common Table Expression (CTE) */ +) -/* 1. Calculate sales per vendor per day */ -SELECT +GROUP BY market_date, vendor_id, customer_id; +/* 2. What is the name of the vendor who sells pie */ +SELECT DISTINCT vendor_name +FROM customer_purchases as cp +INNER JOIN vendor as v + ON cp. vendor_id = v.vendor_id +WHERE product_id IN ( -/* ... re-aggregate the daily sales for each WEEK instead now */ - -/* MODULE 3 */ + SELECT product_id + FROM product + WHERE product_name LIKE '%pie%' +) +/* MODULE 3 */ /* Temp Tables */ @@ -133,14 +245,59 @@ DROP TABLE IF EXISTS temp.new_vendor_inventory; --make the table CREATE TABLE temp.new_vendor_inventory AS --- definition of the table +-- definition of the table +SELECT *, +original_price*5 as inflation +FROM vendor_inventory; /* 2. put the previous table into another temp table, e.g. as temp.new_new_vendor_inventory */ +DROP TABLE IF EXISTS temp.new_new_vendor_inventory; +CREATE TABLE temp.new_new_vendor_inventory AS + +SELECT * +,inflation*2 as super_inflation +FROM temp.new_vendor_inventory + +/* MODULE 3 */ +/* Common Table Expression (CTE) */ + + +/* 1. Calculate sales per vendor per day */ +WITH vendor_daily_sales AS( + SELECT + md.market_date + ,market_day + ,market_week + ,market_year + ,vendor_name + ,SUM(quantity*cost_to_customer_per_qty) as sales + + + FROM customer_purchases cp + INNER JOIN vendor v----- we want the vendor_name + ON v.vendor_id = cp.vendor_id + INNER JOIN market_date_info md + ON cp.market_date = md. market_date + + GROUP BY md.market_date, v.vendor_id + +) + + + +/* ... re-aggregate the daily sales for each WEEK instead now */ +SELECT +market_year +,market_week +,vendor_name +,SUM(sales) as sales +FROM vendor_daily_sales +GROUP BY market_year, market_week, vendor_name /* MODULE 3 */ /* Date functions */ @@ -163,4 +320,4 @@ SELECT b. number of YEARS between now and market_date c. number of HOURS bewtween now and market_date */ -
+
diff --git a/05_src/sql/farmersmarket.db b/05_src/sql/farmersmarket.db index 4720f2483..20927fb0d 100644 Binary files a/05_src/sql/farmersmarket.db and b/05_src/sql/farmersmarket.db differ