@@ -255,8 +255,8 @@ saveRDS(avg_smoke_density, "smoke_density_avg_byZip.R")
255255# # A tibble: 6 × 4
256256# ZCTA5CE10 avg_light avg_medium avg_heavy
257257# <fct> <dbl> <dbl> <dbl>
258- # 1 97833 0.129 0.194 0.419
259- # 2 97840 0.161 0.226 0.387
258+ # 1 97833 0.129 0.194 0.419
259+ # 2 97840 0.161 0.226 0.387
260260# 3 97330 0.290 0.129 0.0323
261261# 4 97004 0.258 0.0968 0.0323
262262# 5 97023 0.194 0.0968 0.0323
@@ -299,27 +299,27 @@ subset_data <- or_sedd_2021 %>%
299299 select(FEMALE, ZIP, PSTCO, AGE, RACE, AMONTH, starts_with("I10_"))
300300head(subset_data)
301301
302- # [1] "FEMALE" "ZIP" "PSTCO"
303- # [4] "AGE" "RACE" "AMONTH"
302+ # [1] "FEMALE" "ZIP" "PSTCO"
303+ # [4] "AGE" "RACE" "AMONTH"
304304# [7] "I10_DX_Visit_Reason1" "I10_DX_Visit_Reason2" "I10_DX_Visit_Reason3"
305- # [10] "I10_DX1" "I10_DX2" "I10_DX3"
306- # [13] "I10_DX4" "I10_DX5" "I10_DX6"
307- # [16] "I10_DX7" "I10_DX8" "I10_DX9"
308- # [19] "I10_DX10" "I10_DX11" "I10_DX12"
309- # [22] "I10_DX13" "I10_DX14" "I10_DX15"
310- # [25] "I10_DX16" "I10_DX17" "I10_DX18"
311- # [28] "I10_DX19" "I10_DX20" "I10_DX21"
312- # [31] "I10_DX22" "I10_DX23" "I10_DX24"
313- # [34] "I10_DX25" "I10_DX26" "I10_DX27"
314- # [37] "I10_DX28" "I10_NDX" "I10_PROCTYPE"
305+ # [10] "I10_DX1" "I10_DX2" "I10_DX3"
306+ # [13] "I10_DX4" "I10_DX5" "I10_DX6"
307+ # [16] "I10_DX7" "I10_DX8" "I10_DX9"
308+ # [19] "I10_DX10" "I10_DX11" "I10_DX12"
309+ # [22] "I10_DX13" "I10_DX14" "I10_DX15"
310+ # [25] "I10_DX16" "I10_DX17" "I10_DX18"
311+ # [28] "I10_DX19" "I10_DX20" "I10_DX21"
312+ # [31] "I10_DX22" "I10_DX23" "I10_DX24"
313+ # [34] "I10_DX25" "I10_DX26" "I10_DX27"
314+ # [37] "I10_DX28" "I10_NDX" "I10_PROCTYPE"
315315
316316```
317317
318318Next we will select July as our month of interest to further reduce the size of the data and to focus on a time frame where we know fires took place in Oregon. We will also load in our environmental data files we made above from amadeus.
319319
320320``` {r eval=FALSE}
321321# subset data to July
322- july_subset_hcup_data <- subset_data[subset_data$AMONTH == 7,]
322+ july_subset_hcup_data <- subset_data[subset_data$AMONTH == 7, ]
323323
324324# load in amadeus files we made previously
325325avg_smoke_density <- readRDS("smoke_density_avg_byZip.R")
@@ -331,13 +331,13 @@ total_smoke_density <- readRDS("smoke_density_total_byZip.R")
331331We will now merge our environmental data into our hospital discharge (HCUP) data using an inner join on ZIP codes present in both datasets.
332332
333333``` {r eval=FALSE}
334- # Perform an inner join to merge `july_subset_hcup_data` with
335- # `avg_smoke_density` based on the ZIP code (`ZIP` in HCUP data and
334+ # Perform an inner join to merge `july_subset_hcup_data` with
335+ # `avg_smoke_density` based on the ZIP code (`ZIP` in HCUP data and
336336# `ZCTA5CE10` in smoke density data)
337337merged_data <- inner_join(july_subset_hcup_data, avg_smoke_density,
338338 by = c("ZIP" = "ZCTA5CE10"))
339339
340- # Perform another inner join to add `total_smoke_density` to the existing
340+ # Perform another inner join to add `total_smoke_density` to the existing
341341# `merged_data`
342342merged_data <- inner_join(merged_data, total_smoke_density,
343343 by = c("ZIP" = "ZCTA5CE10"))
@@ -391,30 +391,30 @@ Finally, we fit a logistic regression model to examine the relationship between
391391``` {r eval=FALSE}
392392# Fit a logistic regression model with asthma diagnosis as the outcome variable
393393# and different smoke exposure levels as predictors
394- model <- glm(has_asthma ~ avg_light + avg_medium + avg_heavy,
394+ model <- glm(has_asthma ~ avg_light + avg_medium + avg_heavy,
395395 data = smoke_summary, family = binomial)
396396
397397# Display model summary
398398summary(model)
399399# Call:
400- # glm(formula = has_asthma ~ avg_light + avg_medium + avg_heavy,
400+ # glm(formula = has_asthma ~ avg_light + avg_medium + avg_heavy,
401401# family = binomial, data = smoke_summary)
402- #
402+ #
403403# Coefficients:
404- # Estimate Std. Error z value Pr(>|z|)
404+ # Estimate Std. Error z value Pr(>|z|)
405405# (Intercept) -3.38823 0.09077 -37.329 < 2e-16 ***
406- # avg_light -0.21258 0.30322 -0.701 0.483
406+ # avg_light -0.21258 0.30322 -0.701 0.483
407407# avg_medium 1.74996 0.32456 5.392 6.98e-08 ***
408408# avg_heavy 1.82572 0.16826 10.850 < 2e-16 ***
409409# ---
410410# Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
411- #
411+ #
412412# (Dispersion parameter for binomial family taken to be 1)
413- #
413+ #
414414# Null deviance: 42004 on 111124 degrees of freedom
415415# Residual deviance: 41674 on 111121 degrees of freedom
416416# AIC: 41682
417- #
417+ #
418418# Number of Fisher Scoring iterations: 6
419419```
420420
0 commit comments