library(dplyr)
https://www.britannica.com/topic/United-States-Presidential-Election-Results-1788863
Sources: Electoral and popular vote totals based on data from the Office of the Clerk of the U.S. House of Representatives; the United States Office of the Federal Register; the Federal Election Commission; Congressional Quarterly’s Guide to U.S. Elections, 4th ed. (2001); and the official certified state vote totals.
url99 <- 'https://www.britannica.com/topic/United-States-Presidential-Election-Results-1788863'
ecs <- url99 |>
xml2::read_html() |>
rvest::html_node(
xpath = '//*[@class="md-raw-html"]/div/table/tbody') |>
rvest::html_table(fill = TRUE)
colnames(ecs) <- c('year',
'candidate',
'party',
'ec_votes',
'pop_votes',
'pop_per')
fns <- data.frame(year = c(1789,
1800,
1800,
1824,
2000),
candidate = c('George Washington',
'Thomas Jefferson',
'Aaron Burr',
'John Quincy Adams',
'Al Gore'),
z = 1)
pres_results <- ecs |>
mutate(pop_votes = as.integer(gsub(',', '', pop_votes)),
pop_per = as.numeric(pop_per),
candidate = gsub('[0-9]', '', candidate) |> trimws(),
candidate = gsub('George Bush$', 'George H.W. Bush', candidate),
# Horace Greeley
party = gsub('/Liberal Republican', '', party),
party = gsub('[0-9]', '', party),
party = gsub('no distinct party designations|no formally organized parties',
'',
party),
party = trimws(party)) |>
left_join(fns) |>
mutate(ec_votes = ifelse(!is.na(z),
gsub('.$', '', ec_votes),
ec_votes),
ec_votes = ifelse(candidate == 'Al Gore',
gsub('.$', '', ec_votes),
ec_votes),
ec_votes = as.integer(ec_votes)) |>
select(-z) |>
group_by(year) |>
mutate(ec_total = sum(ec_votes, na.rm = T)) |> ungroup() |>
filter(!party %in% 'not a candidate',
!candidate %in% 'not voted')
https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/VOQCHQ
library(dplyr)
setwd(dataraw_dir)
county <- read.csv('countypres_2000-2020.csv')
pres_by_county <- county |>
filter(!is.na(candidatevotes)) |>
group_by_at(vars(all_of(colnames(county)[c(1:8,10)]))) |>
summarize(candidatevotes = sum(candidatevotes),
totalvotes = mean(totalvotes)) |> ungroup() |>
mutate(per = round(candidatevotes/totalvotes*100, 1)) |>
mutate(party = tolower(party),
#party = stringr::str_to_title(tolower(party)),
county_name = stringr::str_to_title(tolower(county_name))) |>
group_by(year, state_po, county_name, county_fips) |>
mutate(winner = candidate[which.max(candidatevotes)],
winner = stringr::str_to_title(tolower(winner)),
party_win = party[which.max(candidatevotes)]) |>
ungroup() |>
select(-candidate, -candidatevotes) |>
filter(party %in% c('republican', 'democrat')) |>
tidyr::spread(party, per) |>
rename(state_abbrev = state_po)|>
mutate(GEOID = stringr::str_pad(county_fips, 5, pad = "0")) |>
select(1, 3, 4, 12, 8:11)
Per 2022 district boundaries
uro <- 'https://docs.google.com/spreadsheets/d/1CKngqOp8fzU22JOlypoxNsxL6KSAH920Whc-rd7ebuM/edit?skip_itp2_check=true&pli=1#gid=1871835782'
pres_by_cd00 <- gsheet::gsheet2tbl(uro) |>
janitor::clean_names() |>
mutate(winner = ifelse(biden > trump,
'Joe Biden',
'Donald Trump'),
party_win = ifelse(biden > trump,
'democrat',
'republican'),
house_rep_party = ifelse(party == '(D)',
'democrat',
'republican')) |>
rename(democrat = biden,
republican = trump,
house_rep = incumbent) |>
tidyr::separate(district, into = c('state_abbrev', 'district_code'), sep = '-') |>
mutate(district_code = gsub('AL', '00', district_code)) |>
select(state_abbrev, district_code,
house_rep, house_rep_party,
winner, party_win,
democrat, republican) |>
mutate(last = gsub('^.* ', '', house_rep) |> toupper(),
nn_kos = house_rep |> tolower() |> trimws()) |>
group_by(last) |>
mutate(last_n = n()) |> ungroup()
hm1 <- Rvoteview::download_metadata(type = 'members',
chamber = 'house',
congress = '118') |>
mutate(bioname = stringi::stri_trans_general(bioname, id = "Latin-ASCII"),
nn_vv = gsub(', Jr\\.', '', bioname),
nn_vv = gsub('(.*)(, )(.*$)', '\\3 \\1', nn_vv),
nn_vv = stringr::str_to_title(nn_vv),
nn_vv = gsub(' [A-Z]\\.', '', nn_vv) |> tolower() |> trimws(),
last = gsub(',.*$', '', bioname) |> trimws()) |>
group_by(last) |>
mutate(last_n = n()) |> ungroup()
## [1] "/tmp/RtmpeLbOa7/H118_members.csv"
pbd <- pres_by_cd00 |>
left_join(hm1 |> filter(last_n == 1) |> select(nn_vv, last),
by = 'last') |>
select(nn_kos, nn_vv)
kos <- pbd |> filter(is.na(nn_vv))
strReverse <- function(x) sapply(lapply(strsplit(x, NULL), rev), paste, collapse="")
get_closest <- function(x, y) {
y[stringdist::stringdist(x |> strReverse(),
hm1$nn_vv |> strReverse(),
method = "jw") |> which.min()]
}
pbd0 <- pbd |>
rowwise() |>
mutate(nn_vv = ifelse(is.na(nn_vv),
get_closest(x = nn_kos, y = hm1$nn_vv),
nn_vv))
pres_by_cd <- pbd0 |>
mutate(nn_vv = ifelse(nn_kos == 'chris smith', 'christopher henry smith', nn_vv),
nn_vv = ifelse(nn_kos == 'sanford bishop', 'sanford dixon bishop', nn_vv),
nn_vv = ifelse(nn_kos == 'hal rogers', 'harold dallas (hal) rogers', nn_vv)) |>
left_join(hm1 |> select(nn_vv, icpsr)) |>
inner_join(pres_by_cd00) |>
select(3:11)
Eg:
https://en.wikipedia.org/wiki/United_States_presidential_elections_in_Virginia
options(tigris_use_cache = TRUE, tigris_class = "sf")
states_full <- tigris::states(cb = TRUE) |>
data.frame() |>
select(NAME, STATEFP, STUSPS) |>
rename(state_abbrev = STUSPS)
###
w3a <- c('New Mexico',
'Alaska',
'Arizona',
'District of Columbia',
'Hawaii')
w3b <- c('New York')
w3c <- c('Pennsylvania')
w3d <- c('California')
w5 <- c('Nebraska')
w4a <- c('Maine')
w4b <- c('Utah', 'Wyoming')
w6 <- c('Arkansas', 'Florida')
states <- states_full |>
mutate (l1 = case_when (NAME %in% c(w3a, w6, w4b) ~ list(list(1,3,4,5)), #124
!NAME %in% c(w3a, w3c, w3d, w6, w4b) ~ list(list(1,2,3,4)),
NAME %in% w3c ~ list(list(1,5,6,6)),
NAME %in% w3d ~ list(list(1,2,5,6))),
l2 = case_when (NAME %in% c(w3a, w6, w4b) ~ list(list(1,7,8, 9)),
!NAME %in% c(w3a, w3c, w3d, w6, w4b) ~ list(list(1,5,6,7)),
NAME %in% w3c ~ list(list(1,9,10,10)),
NAME %in% w3d ~ list(list(1,8,11,12))),
l3 = case_when (NAME %in% c(w3a, w6, w4b) ~ list(list(1,11, 12, 13)),
!NAME %in% c(w3a, w3c, w3d, w6, w4b) ~ list(list(1,8, 9, 10)),
NAME %in% w3c ~ list(list(1,1,1,1)),
NAME %in% w3d ~ list(list(1,1,1,1))
),
ns = case_when (NAME %in% c(w3a, w3b, w3c, w3d) ~ 3,
NAME %in% c(w4a, w4b) ~ 4,
NAME %in% c(w5) ~ 5,
NAME %in% c(w6) ~ 6,
!NAME %in% c(w3a, w3b, w3c, w3d, w4a, w4b, w5, w6) ~ 2)
) |>
mutate(ns = ns) |>
filter(!STATEFP %in% c('78', '69', '66', '72', '60')) |>
mutate(NAME = gsub('Washington', 'Washington (state)', NAME))
base_url <-
'https://en.wikipedia.org/wiki/United_States_presidential_elections_in_'
states_correct <- list()
for (i in 1:nrow(states)) {
states_correct[[i]] <-
paste0(base_url, gsub(' ', '_', states$NAME[i])) |>
xml2::read_html() |>
rvest::html_node(
xpath = paste0('//*[@id="mw-content-text"]/div/table[',
states$ns[i],']')) |>
rvest::html_table(fill = TRUE)
if(nrow(states_correct[[i]]) > 2){
x <- states_correct[[i]][, states$l1[i] |> unlist()]
colnames(x) <- c('year', 'candidate', 'votes', 'vote_share')
y <- states_correct[[i]][, states$l2[i] |> unlist()]
colnames(y) <- c('year', 'candidate', 'votes', 'vote_share')
z <- states_correct[[i]][, states$l3[i] |> unlist()]
colnames(z) <- c('year', 'candidate', 'votes', 'vote_share')
states_correct[[i]] <- rbind(x, y, z) |>
mutate(year = gsub("\\D+", "", year),
year = as.integer(substr(year, 1,4)),
vote_share = as.character(vote_share)
)
} else(states_correct[[i]] <- states_correct[[i]])
}
names(states_correct) <- states$NAME
states_correct1 <- states_correct |>
bind_rows(.id = 'state_name') |>
mutate(votes = as.integer(gsub('\\(.*\\)|,', '', votes))) |>
mutate(vote_share = gsub('(^.*\\()(.*)(\\)$)', '\\2', vote_share),
vote_share =as.numeric(gsub('%', '', vote_share)),
# wiki_party = gsub('(^.*\\()([A-Za-z.]*)(\\)$)', '\\2', candidate),
candidate = gsub('\\(.*\\)|\\[.*\\]', '', candidate) |> trimws()) |>
filter(candidate != 'TBD',
nchar(candidate) > 5,
!is.na(year)) |>
## correct hubert humphrey in florida 1972
mutate(candidate = ifelse(state_name == 'Florida' & year == 1972 & candidate == 'Hubert Humphrey',
'George McGovern', candidate))
brits <- unique(pres_results$candidate)
wikis <- unique(states_correct1$candidate)
## setdiff(brits, wikis)
nnb <- sapply(wikis, function(x){
brits[adist(x, brits) |> which.min()]
})
xx <- data.frame(wiki = nnb |> names() |> trimws(),
brit = nnb |> unname() |> trimws()) |>
unique()
ms <- c('Adlai Stevenson II',
'Al Smith',
'Charles Pinckney')
cross <- xx |>
# mutate(brit = ifelse(wiki %in% ms, NA, brit)) |>
mutate(wiki = ifelse(brit == 'William H. Crawford', 'William H. Crawford', wiki)) |>
mutate(brit = ifelse(wiki == 'John Q. Adams', 'John Quincy Adams', brit)) |>
mutate(brit = ifelse(wiki == 'Charles Pinckney', 'Charles Cotesworth Pinckney', brit)) |>
unique() |> na.omit()
pres_by_state <- states_correct1 |>
mutate(vote_share = ifelse(year %in% c('1864', '1868') &
vote_share == 0,
NA, vote_share),
candidate = trimws(candidate)) |>
## na.omit() |>
left_join(cross, by = c('candidate' = 'wiki')) |>
left_join(pres_results |> select(year:party) |>
mutate(party = ifelse(party %in% 'Democratic', 'Democrat', party)),
by = c('brit' = 'candidate',
'year' = 'year')) |>
mutate(party = ifelse(candidate %in% c('Al Smith', 'Adlai Stevenson II'),
'Democrat', party)) |>
rename(NAME = state_name) |>
left_join(states) |>
select(year, state_abbrev, brit, party, votes:vote_share) |>
rename(candidate = brit) |>
filter(year >= 1864, !is.na(vote_share)) |>
select(-votes) |>
unique() |>
mutate(party = ifelse(!party %in% c('Democrat', 'Republican'),
'Other', party)) |>
group_by(year, state_abbrev, party) |>
filter(vote_share == max(vote_share)) |>
ungroup() |>
group_by(state_abbrev, year) |>
mutate(winner = candidate[which.max(vote_share)],
#party = tolower(party),
party_win = tolower(party[which.max(vote_share)])) |>
ungroup() |>
select(-candidate) |>
unique() |>
tidyr::spread(party, vote_share)
colnames(pres_by_state) <- colnames(pres_by_state) |> tolower()
https://docs.google.com/spreadsheets/d/1LrBXlqrtSZwyYOkpEEXFwQggvtR0bHHTxs9kq4kjOjw/edit#gid=0
fnames <- c('HexCDv30wm',
'HexSTv30wm',
'TileInv10',
'TileOutv10')
sfs <- lapply(fnames, function(x) {
sf::st_read(dsn = paste0(dataraw_dir, x),
layer = x,
quiet = TRUE) })
names(sfs) <- fnames
xsf_HexCDv30wm <- sfs$HexCDv30 |>
rename(state_abbrev = STATEAB,
state = STATENAME) |>
mutate(district_code = gsub('[A-Z][A-Z]', 0, CDLABEL),
district_code = stringr::str_pad (district_code, 2, pad = 0)) |>
select(GEOID, state, state_abbrev, district_code)
xsf_HexSTv30wm <- sfs$HexSTv30wm |>
rename(state_abbrev = STATEAB, state = STATENAME)
xsf_TileInv10 <- sfs$TileInv10 %>% select(5:7) %>%
rename(state_abbrev = State, state = StateName)
xsf_TileOutv10 <- sfs$TileOutv10 %>% select(5:7) %>%
rename(state_abbrev = State, state = StateName)
xsf_HexCDv30wm |> head()
## Simple feature collection with 6 features and 4 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -13819550 ymin: 3690450 xmax: -12273940 ymax: 5728412
## Projected CRS: Mercator_2SP
## GEOID state state_abbrev district_code geometry
## 1 0402 Arizona AZ 02 MULTIPOLYGON (((-12305045 3...
## 2 0405 Arizona AZ 05 MULTIPOLYGON (((-12305045 3...
## 3 0606 California CA 06 MULTIPOLYGON (((-13696151 5...
## 4 0608 California CA 08 MULTIPOLYGON (((-13588284 5...
## 5 0610 California CA 10 MULTIPOLYGON (((-13694006 5...
## 6 0614 California CA 14 MULTIPOLYGON (((-13685745 5...
https://fred.stlouisfed.org/release/tables?rid=118&eid=259194&od=#
freds <- lapply(1900:2022, function(x){
ur <- paste0('https://fred.stlouisfed.org/release/tables?rid=118&eid=259194&od=', x, '-01-01#')
ur |>
xml2::read_html() |>
rvest::html_node(
xpath = '//*[@class="table-responsive"]/table/tbody') |>
rvest::html_table(fill = TRUE) |>
mutate(year = x,
NAME = X2) |>
rowwise() |>
mutate(population = ifelse(x < 1950,
gsub('\\.|,', '', X4) |> as.integer(),
gsub('\\.|,', '', X3) |> as.integer())) |>
ungroup() |>
select(year, NAME, population)
})
fred_pop_by_state <- freds |>
bind_rows() |>
#filter(!is.na(population)) |>
left_join(states_full) |>
select(1,5,2, 3)
fred_pop_by_state |>
head()|>
knitr::kable()
setwd(data_dir)
#usethis::use_data(pres_by_state, overwrite=TRUE)
usethis::use_data(pres_by_cd, overwrite=TRUE)
usethis::use_data(pres_by_county, overwrite=TRUE)
usethis::use_data(pres_results, overwrite=TRUE)
## usethis::use_data(fred_pop_by_state, overwrite=TRUE)
usethis::use_data(xsf_HexCDv30wm, overwrite=TRUE)
usethis::use_data(xsf_HexSTv30wm, overwrite=TRUE)
usethis::use_data(xsf_TileInv10, overwrite=TRUE)
usethis::use_data(xsf_TileOutv10, overwrite=TRUE)