Skip to content

Commit

Permalink
update standings -> 2025-02-25 10:00:00
Browse files Browse the repository at this point in the history
  • Loading branch information
KittJonathan committed Feb 25, 2025
1 parent c563ecc commit d7db74b
Show file tree
Hide file tree
Showing 15 changed files with 8,523 additions and 23,730 deletions.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
106 changes: 101 additions & 5 deletions 02-SCRIPTS/clean_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -74,14 +74,104 @@ all_data <- all_data |>

# Export the dataset

write_csv(x = all_data, file = "03-DATA_PROCESSED/standings_to_20250114060000.csv")
write_csv(x = all_data, file = "03-DATA_PROCESSED/standings_to_20250224_220000.csv")

# ⬇️ Get and clean data since Dalin's arrival -------------------------
# ⬇️ Get and clean data from Dalin's arrival to 2025-02-24 ----------------

# Richomme : 2025_01_15 07:21:02


# Dalin : 2025_01_14 08:24:49 (paths[1] -> paths[383])
# Richomme : 2025_01_15 07:21:02 (paths[384] -> )
# 2025-02-24 : paths[626]


paths <- list.files(path = "01-DATA_RAW/", full.names = T, pattern = "*.xlsx")
paths <- paths[384:626]

all_files <- tibble(path = paths) |>
separate(col = path, into = c("dir", "file"), sep = "/", remove = FALSE) |>
select(-dir) |>
mutate(date_time = str_remove_all(string = file, pattern = "vendeeglobe_leaderboard_"),
date_time = str_remove_all(string = date_time, pattern = ".xlsx")) |>
separate(col = date_time, into = c("date", "time"), sep = "_") |>
select(-file) |>
arrange(date, time)

# all_files |>
# mutate(datetime = as_datetime(date, time))
# mutate(sheet_range = case_when(date <= 20250113 ~ "B6:U45",
# .default = NA))

all_data <- list()

ok_val <- c(1:40, "RET")

for (i in 1:nrow(all_files)) {

df <- readxl::read_xlsx(all_files$path[i],
range = "B6:U47", col_names = FALSE) |>
filter(...1 %in% ok_val)

names(df) <- c("rank", "nat_sail", "skipper_boat", "hour_FR", "lat_deg", "lon_deg",
"last30min_heading_deg", "last30min_speed_kts", "last30min_VMG_kts", "last30min_distance_nm",
"since_last_standings_heading_deg", "since_last_standings_speed_kts", "since_last_standings_VMG_kts",
"since_last_standings_distance_nm", "last24hrs_heading_deg", "last24hrs_speed_kts",
"last24hrs_VMG_kts", "last24hrs_distance_nm", "distance_to_finish_nm", "distance_to_leader_nm")

all_data[[i]] <- df |>
mutate(nat = str_extract(string = nat_sail, pattern = "^.{3}"),
sail = str_remove(string = nat_sail, pattern = "^.{3}"),
.after = nat_sail) |>
left_join(skippers) |>
select(rank, nat, sail, surname, name, hour_FR:distance_to_leader_nm) |>
mutate(date = all_files$date[i],
time = all_files$time[i],
.before = rank)

}

all_data <- bind_rows(all_data)

all_data <- all_data |>
mutate(datetime = as_datetime(paste0(date, time)),
.before = date, .keep = "unused") |>
select(-hour_FR) |>
mutate(sail = case_when(sail == "FRA85" ~ "FRA 85",
.default = sail)) |>
mutate(across(.cols = ends_with("heading_deg"),
.fns = ~ str_remove_all(string = .x, pattern = "°")),
across(.cols = ends_with(c("speed_kts", "VMG_kts")),
.fns = ~ str_remove_all(string = .x, pattern = " kts")),
across(.cols = contains("distance_"),
.fns = ~ str_remove_all(string = .x, pattern = " nm")),
across(.cols = last30min_heading_deg:distance_to_leader_nm,
.fns = ~ as.numeric(.x)),
across(.cols = c(rank:sail),
.fns = ~ as_factor(.x))) |>
mutate(lat = parzer::parse_lat(lat_deg), .after = lat_deg) |>
mutate(lon = parzer::parse_lon(lon_deg), .after = lon_deg) |>
mutate(lat = case_when(is.na(lat_deg) ~ NA,
.default = lat)) |>
mutate(lon = case_when(is.na(lon_deg) ~ NA,
.default = lon))

# Read first dataset
d1 <- read_csv("03-DATA_PROCESSED/standings_to_20250114060000.csv")

# Combine both datasets
full_data <- bind_rows(d1, all_data)

# Export the dataset

write_csv(x = full_data, file = "03-DATA_PROCESSED/standings_to_20250224_220000.csv")

# ⬇️ Add latest standings -------------------------------------------------

paths <- list.files(path = "01-DATA_RAW/", full.names = T, pattern = "*.xlsx")
paths <- paths[384:length(paths)]

# 2025-02-24 : paths[626]

paths <- paths[627:length(paths)]

all_files <- tibble(path = paths) |>
separate(col = path, into = c("dir", "file"), sep = "/", remove = FALSE) |>
Expand Down Expand Up @@ -150,6 +240,12 @@ all_data <- all_data |>
mutate(lon = case_when(is.na(lon_deg) ~ NA,
.default = lon))

# Read existing standings dataset
d1 <- read_csv("03-DATA_PROCESSED/standings_to_20250224_220000.csv")

# Combine both datasets
full_data <- bind_rows(d1, all_data)

# Export the dataset

write_csv(x = all_data, file = "03-DATA_PROCESSED/standings_since_20250114100000.csv")
write_csv(x = full_data, file = "03-DATA_PROCESSED/standings_to_20250225_100000.csv")
12 changes: 6 additions & 6 deletions 02-SCRIPTS/get_standings.R
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,12 @@ get_standings <- function(date = NULL, time = NULL, from = NULL, to = NULL, dir)
}
}

get_standings(date = "20250224", time = "020000", dir = "01-DATA_RAW/")
get_standings(date = "20250224", time = "060000", dir = "01-DATA_RAW/")
get_standings(date = "20250224", time = "100000", dir = "01-DATA_RAW/")
get_standings(date = "20250224", time = "140000", dir = "01-DATA_RAW/")
get_standings(date = "20250224", time = "180000", dir = "01-DATA_RAW/")
get_standings(date = "20250224", time = "220000", dir = "01-DATA_RAW/")
get_standings(date = "20250225", time = "020000", dir = "01-DATA_RAW/")
get_standings(date = "20250225", time = "060000", dir = "01-DATA_RAW/")
get_standings(date = "20250225", time = "100000", dir = "01-DATA_RAW/")
get_standings(date = "20250225", time = "140000", dir = "01-DATA_RAW/")
get_standings(date = "20250225", time = "180000", dir = "01-DATA_RAW/")
get_standings(date = "20250225", time = "220000", dir = "01-DATA_RAW/")

get_standings(from = "20250221", to = "20250223", dir = "01-DATA_RAW/")

Expand Down
31 changes: 31 additions & 0 deletions 02-SCRIPTS/gt_standings.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# 📦 Load packages --------------------------------------------------------

library(tidyverse)
library(glue)
library(gt)

# ⬇️ Import the data ------------------------------------------------------


standings <- read_csv("03-DATA_PROCESSED/standings_to_20250225_100000.csv")

arrivals <- read_csv("04-RESULTS/arrivals.csv")

withdrawals <- read_csv("04-RESULTS/withdrawals.csv")

# 🧹 Clean the data -------------------------------------------------------

arrivals |>
separate(col = duration,
into = c("days", "hours", "minutes", "seconds"),
remove = FALSE) |>
mutate(across(.cols = days:seconds,
.fns = ~parse_number(.x))) |>
mutate(duration = duration(duration),
race_time = glue("{days} days {hours} hours {minutes} minutes {seconds} seconds")) |>
select(surname, race_time) |>
gt()

standings |>
filter(datetime >= "2025-02-25 10:00:00") |>
drop_na() |> select(surname, name, distance_to_finish_nm)
4 changes: 2 additions & 2 deletions 02-SCRIPTS/passage_caps.R
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,7 @@ write_csv(x = arrivals, file = "04-RESULTS/arrivals.csv")

# Abandons

retired <- tribble(
withdrawals <- tribble(
~surname, ~datetime,
"Sorel", "2024-11-15 13:25:00",
"Burton", "2024-12-05 05:00:00",
Expand All @@ -287,4 +287,4 @@ retired <- tribble(
"Boissieres", "2025-02-02 14:00:00"
)

write_csv(x = retired, file = "04-RESULTS/retired.csv")
write_csv(x = withdrawals, file = "04-RESULTS/withdrawals.csv")
Loading

0 comments on commit d7db74b

Please sign in to comment.