diff --git a/02_activities/assignments/DC_Cohort/Assignment1.md b/02_activities/assignments/DC_Cohort/Assignment1.md index f78778f5b..d035abc54 100644 --- a/02_activities/assignments/DC_Cohort/Assignment1.md +++ b/02_activities/assignments/DC_Cohort/Assignment1.md @@ -205,5 +205,10 @@ Consider, for example, concepts of fariness, inequality, social structures, marg ``` -Your thoughts... +My thoughts on social identity: I suppose databases serve the purpose of binning real-life complexities into distinct categories and binaries, and by virtue of that may not reflect reality. +Something I notice often in different forms (for the eventual purpose of data collection) is racial identity, which can become complicated for +diaspora communities and due to the nature of changing man-made geographical/geopolitical borders. +My thoughts on data use: Often to get different tasks done (medical, government services) +we have to consent to the collection of our data, with general descriptions of their future use, but we do not know for certain what will be enquired from our data/ +demographics in the future. ``` diff --git a/02_activities/assignments/DC_Cohort/assignment1.sql b/02_activities/assignments/DC_Cohort/assignment1.sql index c992e3205..920439274 100644 --- a/02_activities/assignments/DC_Cohort/assignment1.sql +++ b/02_activities/assignments/DC_Cohort/assignment1.sql @@ -1,21 +1,23 @@ /* ASSIGNMENT 1 */ /* SECTION 2 */ - --SELECT /* 1. Write a query that returns everything in the customer table. */ - - +SELECT * +FROM customer; /* 2. Write a query that displays all of the columns and 10 rows from the cus- tomer table, sorted by customer_last_name, then customer_first_ name. */ - - +SELECT * +FROM customer +ORDER BY customer_last_name, customer_first_name +LIMIT 10; --WHERE /* 1. Write a query that returns all customer purchases of product IDs 4 and 9. */ - - +SELECT * +FROM customer_purchases +WHERE product_id IN (4, 9); /*2. Write a query that returns all customer purchases and a new calculated column 'price' (quantity * cost_to_customer_per_qty), filtered by customer IDs between 8 and 10 (inclusive) using either: @@ -23,30 +25,57 @@ filtered by customer IDs between 8 and 10 (inclusive) using either: 2. one condition using BETWEEN */ -- option 1 - +SELECT * +, (quantity * cost_to_customer_per_qty) AS price +FROM customer_purchases +WHERE customer_id >=8 AND customer_id <=10; -- option 2 - - +SELECT * +, (quantity * cost_to_customer_per_qty) AS price +FROM customer_purchases +WHERE customer_id BETWEEN 8 AND 10; --CASE /* 1. Products can be sold by the individual unit or by bulk measures like lbs. or oz. Using the product table, write a query that outputs the product_id and product_name columns and add a column called prod_qty_type_condensed that displays the word “unit” if the product_qty_type is “unit,” and otherwise displays the word “bulk.” */ - +SELECT +product_id, +product_name, +CASE +WHEN product_qty_type = 'unit' THEN 'unit' +ELSE 'bulk' +END AS prod_qty_type_condensed +FROM product; /* 2. We want to flag all of the different types of pepper products that are sold at the market. add a column to the previous query called pepper_flag that outputs a 1 if the product_name contains the word “pepper” (regardless of capitalization), and otherwise outputs 0. */ - +SELECT +product_id, +product_name, +CASE +WHEN product_qty_type = 'unit' THEN 'unit' +ELSE 'bulk' +END AS product_qty_condensed, +CASE +WHEN LOWER(product_name) LIKE '%pepper%' THEN 1 +ELSE 0 +END AS pepper_flag +FROM product; --JOIN /* 1. Write a query that INNER JOINs the vendor table to the vendor_booth_assignments table on the vendor_id field they both have in common, and sorts the result by vendor_name, then market_date. */ - +SELECT * +FROM vendor +INNER JOIN vendor_booth_assignments +ON vendor.vendor_id = vendor_booth_assignments.vendor_id +ORDER BY vendor_name, market_date; @@ -55,7 +84,11 @@ vendor_id field they both have in common, and sorts the result by vendor_name, t -- AGGREGATE /* 1. Write a query that determines how many times each vendor has rented a booth at the farmer’s market by counting the vendor booth assignments per vendor_id. */ - +SELECT +vendor_id +, COUNT(*) AS booth_rentals +FROM vendor_booth_assignments +GROUP BY vendor_id; /* 2. The Farmer’s Market Customer Appreciation Committee wants to give a bumper @@ -63,7 +96,17 @@ sticker to everyone who has ever spent more than $2000 at the market. Write a qu of customers for them to give stickers to, sorted by last name, then first name. HINT: This query requires you to join two tables, use an aggregate function, and use the HAVING keyword. */ - +SELECT +c.customer_id, +c.customer_first_name, +c.customer_last_name, +SUM(cp.quantity * cp.cost_to_customer_per_qty) AS total_spent +FROM customer AS c +JOIN customer_purchases AS cp +ON c.customer_id = cp.customer_id +GROUP BY c.customer_id +HAVING total_spent > 2000 +ORDER BY c.customer_last_name, c.customer_first_name; --Temp Table @@ -78,19 +121,11 @@ When inserting the new vendor, you need to appropriately align the columns to be VALUES(col1,col2,col3,col4,col5) */ +CREATE TEMP TABLE new_vendor AS +SELECT * +FROM vendor +INSERT INTO new_vendor (vendor_id, vendor_name, vendor_type, vendor_owner_first_name, vendor_owner_last_name) +VALUES (10, 'Thomass Superfood Store', 'Fresh Focused store', 'Thomas', 'Rosenthal'); --- Date -/*1. Get the customer_id, month, and year (in separate columns) of every purchase in the customer_purchases table. - -HINT: you might need to search for strfrtime modifers sqlite on the web to know what the modifers for month -and year are! */ - - - -/* 2. Using the previous query as a base, determine how much money each customer spent in April 2022. -Remember that money spent is quantity*cost_to_customer_per_qty. - -HINTS: you will need to AGGREGATE, GROUP BY, and filter... -but remember, STRFTIME returns a STRING for your WHERE statement!! */ diff --git a/02_activities/assignments/DC_Cohort/naiyara_farmers_market_logical_data_model.drawio.pdf b/02_activities/assignments/DC_Cohort/naiyara_farmers_market_logical_data_model.drawio.pdf new file mode 100644 index 000000000..c8b7a147b Binary files /dev/null and b/02_activities/assignments/DC_Cohort/naiyara_farmers_market_logical_data_model.drawio.pdf differ diff --git a/04_this_cohort/live_code/DC/module_2/module_2.sqbpro b/04_this_cohort/live_code/DC/module_2/module_2.sqbpro index 73ace631b..c8d80157a 100644 --- a/04_this_cohort/live_code/DC/module_2/module_2.sqbpro +++ b/04_this_cohort/live_code/DC/module_2/module_2.sqbpro @@ -1,94 +1,138 @@ -
/* MODULE 2 */ +
/* MODULE 2 */ /* SELECT */ /* 1. Select everything in the customer table */ -SELECT +SELECT * +FROM customer; /* 2. Use sql as a calculator */ - +SELECT 1+1 as addition, 10*5 as multiplication, pi() as pie; /* 3. Add order by and limit clauses */ +SELECT * +FROM customer +ORDER BY customer_first_name +LIMIT 10; +/* 4. Select multiple specific columns */ +SELECT customer_id, customer_first_name +FROM customer; - -/* 4. Select multiple specific columns * / - - - -/* 5. Add a static value in a column *//* MODULE 2 */ +/* 5. Add a static value in a column */ +SELECT 2025 as this_year, 'October' as this_month, customer_id +FROM customer/* MODULE 2 */ /* WHERE */ /* 1. Select only customer 1 from the customer table */ -SELECT * +SELECT * FROM customer -WHERE +WHERE customer_id =1; /* 2. Differentiate between AND and OR */ - +SELECT * +FROM customer +WHERE customer_id = 1 +OR customer_id = 2; /* 3. IN */ - - +SELECT * +FROM customer +WHERE customer_id IN (3,4,5) +OR customer_postal_code IN ('M4M', 'MIL'); /* 4. LIKE */ +SELECT * FROM product +WHERE product_name LIKE '%pepper%'; +/* 5. Nulls and Blanks*/ -/* 5. Nulls and Blanks* / +SELECT * FROM product +WHERE product_size IS NULL -- null +OR product_size = ''; -- blank +/* 6. BETWEEN x AND y */ +SELECT * +FROM customer +WHERE customer_id BETWEEN 1 AND 20; +--dates -/* 6. BETWEEN x AND y *//* MODULE 2 */ +--SELECT market_date +--FROM market_date_info +--WHERE market_date BETWEEN '2022-10-01' AND '2022-10-31'/* MODULE 2 */ /* CASE */ SELECT * /* 1. Add a CASE statement declaring which days vendors should come */ - +, CASE WHEN vendor_type = 'Fresh Focused' THEN 'Wednesday' + WHEN vendor_type = 'Prepared Foods' THEN 'Thursday' + ELSE 'Saturday' +END as day_of_specialty /* 2. Add another CASE statement for Pie Day */ - +, CASE WHEN vendor_name = "Annie's Pies" -- double quotes work here + THEN 'Annie is great' + END as pie_day /* 3. Add another CASE statement with an ELSE clause to handle rows evaluating to False */ +,CASE WHEN vendor_name LIKE '%pie%' + THEN 'Wednesday' + ELSE 'Friday' + END as also_pie_day -/* 4. Experiment with selecting a different column instead of just a string value */ +FROM vendor; -FROM vendor/* MODULE 2 */ +/* 4. Experiment with selecting a different column instead of just a string value */ +SELECT * +,CASE WHEN cost_to_customer_per_qty < '1.00' +THEN cost_to_customer_per_qty*5 +ELSE cost_to_customer_per_qty +END AS inflation + +FROM customer_purchases/* MODULE 2 */ /* DISTINCT */ /* 1. Compare how many customer_ids are the customer_purchases table, one select with distinct, one without */ -- 4221 rows -SELECT customer_id FROM customer_purchases +SELECT customer_id FROM customer_purchases; +SELECT DISTINCT customer_id FROM customer_purchases; /* 2. Compare the difference between selecting market_day in market_date_info, with and without distinct: what do these difference mean?*/ - +SELECT market_day +FROM market_date_info; +-- market only open on two days /* 3. Which vendor has sold products to a customer */ +SELECT vendor_id +FROM customer_purchases; - -/* 4. Which vendor has sold products to a customer ... and which product was it * / - +/* 4. Which vendor has sold products to a customer ... and which product was it */ +SELECT DISTINCT vendor_id, product_id +FROM customer_purchases; /* 5. Which vendor has sold products to a customer ... and which product was it? ... AND to whom was it sold*/ - +SELECT DISTINCT vendor_id, product_id +FROM customer_purchases /* MODULE 2 */ /* INNER JOIN */ @@ -97,7 +141,16 @@ SELECT customer_id FROM customer_purchases ... use an INNER JOIN to see only products that have been purchased */ -- without table aliases +SELECT product_name, -- coming from the product TABLE +vendor_id, -- rest are coming from customer_purchases +market_date, +customer_id, +customer_purchases.product_id, +product.product_id +FROM product +INNER JOIN customer_purchases + ON customer_purchases.product_id = product.product_id; @@ -107,6 +160,17 @@ SELECT customer_id FROM customer_purchases Add customers' first and last names with an INNER JOIN */ -- using table aliases +SELECT DISTINCT +vendor_id, +product_id, +c.customer_id, +customer_first_name, -- coming from customer +customer_last_name -- coming from customer + + +FROM customer_purchases as cp +INNER JOIN customer as c + ON c.customer_id = cp.customer_id /* MODULE 2 */ @@ -116,15 +180,51 @@ SELECT customer_id FROM customer_purchases /* 1. There are products that have been bought ... but are there products that have not been bought? Use a LEFT JOIN to find out*/ +SELECT DISTINCT +p.product_id +,cp.product_id as [cp.product_id] +,product_name + +FROM product as p +LEFT JOIN customer_purchases as cp + ON p.product_id = cp.product_id; + + +/* 2. Directions of LEFT JOINs matter ...*/ +-- only products that have been sold ... because there are no product ids in CP that ARENT in product +SELECT DISTINCT +p.product_id +,cp.product_id as [cp.product_id] +,product_name + +FROM customer_purchases as p +LEFT JOIN product as cp + ON p.product_id = cp.product_id /* 2. Directions of LEFT JOINs matter ...*/ +-- only products that have been sold ... because there are no product ids in CP that ARENT in product +SELECT DISTINCT +p.product_id +,cp.product_id as [cp.product_id] +,product_name +FROM customer_purchases as p +LEFT JOIN product as cp + ON p.product_id = cp.product_id +/* 3. As do which table's values you filter on ... */ +SELECT DISTINCT +p.product_id +,cp.product_id as [cp.product_id] +,product_name -/* 3. As do which values you filter on ... */ +FROM product as p +LEFT JOIN customer_purchases as cp + ON p.product_id = cp.product_id +WHERE p.product_id BETWEEN 1 AND 6 -- if we pick product, 6 rows (1-6), otherwise 5 rows because zinnias not been bought @@ -141,7 +241,12 @@ LEFT JOIN product AS p ...Note how the row count changed from 24 to 23 */ -/* MODULE 2 */ +SELECT * + +FROM product AS p +LEFT JOIN product_category AS pc + ON pc.product_category_id = p.product_category_id + ORDER by pc.product_category_id/* MODULE 2 */ /* Multiple Table JOINs */ @@ -149,12 +254,37 @@ LEFT JOIN product AS p (Which vendor has sold products to a customer AND which product was it AND to whom was it sold) Replace all the IDs (customer, vendor, and product) with the names instead*/ - - - -/* 2. Select product_category_name, everything from the product table, and then LEFT JOIN the customer_purchases table +SELECT DISTINCT +--v.vendor_id +vendor_name +--, product_id +,product_name +--,customer_id -- first/last name +,customer_first_name +,customer_last_name + +FROM customer_purchases as cp +INNER JOIN customer as c + ON c.customer_id = cp.customer_id +INNER JOIN vendor as v + ON v.vendor_id = cp.vendor_id +INNER JOIN product as p + ON p.product_id = cp.product_id; + + +/* 2. Select product_category_name, everything from the product table, +and then LEFT JOIN the customer_purchases table ... how does this LEFT JOIN affect the number of rows? Why do we have more rows now?*/ +SELECT product_category_name, p.*, cp.product_id as productid_in_purchases_table + +FROM product_category as pc +INNER JOIN product as p + ON p.product_category_id = pc.product_category_id +LEFT JOIN customer_purchases as cp + ON cp.product_id = p.product_id + +ORDER BY cp.product_id
diff --git a/04_this_cohort/live_code/DC/module_3/module_3.sqbpro b/04_this_cohort/live_code/DC/module_3/module_3.sqbpro index 3d421003d..9459f89d4 100644 --- a/04_this_cohort/live_code/DC/module_3/module_3.sqbpro +++ b/04_this_cohort/live_code/DC/module_3/module_3.sqbpro @@ -1,33 +1,57 @@ -
/* MODULE 3 */ +/* MODULE 3 */ /* COUNT */ /* 1. Count the number of products */ - + SELECT COUNT(product_id) as num_of_product + FROM product; /* 2. How many products per product_qty_type */ - - +SELECT product_qty_type, COUNT(product_id) as num_of_product +FROM product +GROUP BY product_qty_type; /* 3. How many products per product_qty_type and per their product_size */ +SELECT product_size +,product_qty_type +, COUNT(product_id) as num_of_product +FROM product +GROUP BY product_size, product_qty_type - +ORDER BY product_qty_type; /* COUNT DISTINCT 4. How many unique products were bought */ +SELECT COUNT(DISTINCT product_id) as bought_prods +FROM customer_purchases /* MODULE 3 */ /* SUM & AVG */ -/* 1. How much did customers spend each day */ +/* 1. How much did customers spend each (per) day */ +SELECT +market_date +,customer_id +,SUM(quantity*cost_to_customer_per_qty) as total_spend +FROM customer_purchases +GROUP BY market_date,customer_id; /* 2. How much does each customer spend on average */ +SELECT +customer_first_name +,customer_last_name +,ROUND(AVG(quantity*cost_to_customer_per_qty),2) as avg_spend + +FROM customer_purchases as cp +INNER JOIN customer as c + ON c.customer_id = cp.customer_id +GROUP BY c.customer_id /* MODULE 3 */ /* MIN & MAX */ @@ -36,25 +60,47 @@ /* 1. What is the most expensive product ...pay attention to how it doesn't handle ties very well */ - +SELECT product_name, max(original_price) as most_expensive -/* 2. Prove that max is working */ +FROM vendor_inventory as vi +INNER JOIN product as p + ON p.product_id = vi.product_id; + + /* 2. Prove that max is working */ +SELECT DISTINCT +product_name, +original_price +FROM vendor_inventory as vi +INNER JOIN product as p + ON p.product_id = vi.product_id; /* 3. Find the minimum price per each product_qty_type */ +SELECT product_name +,product_qty_type +,min(original_price) +FROM vendor_inventory as vi +INNER JOIN product as p + ON p.product_id = vi.product_id +GROUP BY product_qty_type; /* 4. Prove that min is working */ +SELECT DISTINCT product_name +,product_qty_type +--,min(original_price) +,original_price - +FROM vendor_inventory as vi +INNER JOIN product as p + ON p.product_id = vi.product_id; /* 5. Min/max on a string ... not particularly useful? */ - - -/* MODULE 3 */ +SELECT max(product_name) +FROM product/* MODULE 3 */ /* Arithmitic */ @@ -163,4 +209,4 @@ SELECT b. number of YEARS between now and market_date c. number of HOURS bewtween now and market_date */ - +