-
Notifications
You must be signed in to change notification settings - Fork 0
/
geocodeData.R
50 lines (36 loc) · 1.09 KB
/
geocodeData.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# Geocoding the data?
library(ggmap)
library(plyr)
bc <- read.csv('./bike_collisions.csv', stringsAsFactors = FALSE)
reg <- bc[which(dd$COMMNAME == 'REGINA'),]
head(reg)
addy <- tail(reg[, c('USTREET1', 'USTREET2', "COMMNAME")])
gen_lookup <- function(df) {
s1 <- df[[1]]
s2 <- df[[2]]
comm <- df[[3]]
if (grepl('-', s2)) {
num1 <- as.numeric(substr(s2, 1, regexpr('-', s2)[[1]]-1))
num2 <- as.numeric(substr(s2, regexpr('-', s2)[[1]]+1, nchar(s2)))
mid <- floor((num1 + num2)/2)
lookup <- paste0(mid, ' ', s1, ', ', comm,', Saskatchewan')
} else {
lookup <- paste0(s1, ' and ', s2, ', ', comm, ', Saskatchewan')
}
return( lookup )
}
reg$lookup_str <- sapply(
as.list(as.data.frame(t(reg[, c('USTREET1', 'USTREET2', 'COMMNAME')]), stringsAsFactors = FALSE)),
gen_lookup
)
ll <- lapply(reg$lookup_str,
function(x) {
print(x)
geocode(x)
}
)
dd <- rbind.fill(ll)
reg[, c('lat', 'lon')] <- dd[, c('lat', 'lon')]
write.csv(reg, file = './data/regina.csv', row.names = FALSE)
hit_pct <- 1 - (nrow(reg[which(is.na(reg$lat)), ])/nrow(reg))
print(hit_pct)