From e0510fdbe3124ff518f5f6cb40391c74bb7a3e14 Mon Sep 17 00:00:00 2001 From: Tony ElHabr Date: Thu, 24 Aug 2023 13:09:05 -0500 Subject: [PATCH] all the code deletion --- .../automated_fotmob_match_details.yml | 28 ----- .../automated_fotmob_matches_by_date.yml | 28 ----- .../backfill_fotmob_match_details.R | 117 ------------------ .../update_fotmob_match_details.R | 91 -------------- .../backfill_fotmob_matches_by_date.R | 55 -------- .../update_fotmob_matches_by_date.R | 72 ----------- raw-data/fotmob-leagues/get_fotmob_leagues.R | 9 -- 7 files changed, 400 deletions(-) delete mode 100644 .github/workflows/automated_fotmob_match_details.yml delete mode 100644 .github/workflows/automated_fotmob_matches_by_date.yml delete mode 100644 R/fotmob_match_details/backfill_fotmob_match_details.R delete mode 100644 R/fotmob_match_details/update_fotmob_match_details.R delete mode 100644 R/fotmob_matches_by_date/backfill_fotmob_matches_by_date.R delete mode 100644 R/fotmob_matches_by_date/update_fotmob_matches_by_date.R delete mode 100644 raw-data/fotmob-leagues/get_fotmob_leagues.R diff --git a/.github/workflows/automated_fotmob_match_details.yml b/.github/workflows/automated_fotmob_match_details.yml deleted file mode 100644 index 46838517..00000000 --- a/.github/workflows/automated_fotmob_match_details.yml +++ /dev/null @@ -1,28 +0,0 @@ -name: Scrape fotmob match details - -# At 23:31 daily. -on: - schedule: - - cron: "31 23 * * *" - -jobs: - update-fotmob-match-details: - runs-on: macOS-latest - env: - GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} - steps: - - uses: actions/checkout@v2 - - uses: r-lib/actions/setup-r@v2 - - name: Package Installation - run: Rscript -e 'install.packages(c("devtools", "dplyr", "purrr", "tidyr", "janitor", "readr", "piggyback"))' - - name: worldfootballR Package Installation - run: Rscript -e 'devtools::install_github("JaseZiv/worldfootballR")' - - name: Update match results - run: Rscript -e 'source(file.path("R", "fotmob_match_details", "update_fotmob_match_details.R"), echo = TRUE)' - - name: Commit - run: | - git config --global user.name 'JaseZiv' - git config --global user.email 'jaseziv83@gmail.com' - git add . - git commit -m 'updating fotmob match details' || echo "No changes to commit" - git push || echo "No changes to commit" diff --git a/.github/workflows/automated_fotmob_matches_by_date.yml b/.github/workflows/automated_fotmob_matches_by_date.yml deleted file mode 100644 index a4a4cf23..00000000 --- a/.github/workflows/automated_fotmob_matches_by_date.yml +++ /dev/null @@ -1,28 +0,0 @@ -name: Scrape fotmob matches by date - -# At 23:01 daily. -on: - schedule: - - cron: "1 23 * * *" - -jobs: - update-fotmob-matches-by-date: - runs-on: macOS-latest - env: - GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} - steps: - - uses: actions/checkout@v2 - - uses: r-lib/actions/setup-r@v2 - - name: Package Installation - run: Rscript -e 'install.packages(c("devtools", "dplyr", "purrr", "tidyr", "lubridate", "janitor", "readr", "piggyback"))' - - name: worldfootballR Package Installation - run: Rscript -e 'devtools::install_github("JaseZiv/worldfootballR")' - - name: Update match results - run: Rscript -e 'source(file.path("R", "fotmob_matches_by_date", "update_fotmob_matches_by_date.R"), echo = TRUE)' - - name: Commit - run: | - git config --global user.name 'JaseZiv' - git config --global user.email 'jaseziv83@gmail.com' - git add . - git commit -m 'updating fotmob matches by date' || echo "No changes to commit" - git push || echo "No changes to commit" diff --git a/R/fotmob_match_details/backfill_fotmob_match_details.R b/R/fotmob_match_details/backfill_fotmob_match_details.R deleted file mode 100644 index b77c6bb4..00000000 --- a/R/fotmob_match_details/backfill_fotmob_match_details.R +++ /dev/null @@ -1,117 +0,0 @@ -library(worldfootballR) -library(dplyr) -library(purrr) -library(tidyr) -library(janitor) -library(readr) - -source("R/piggyback.R") -matches_by_date <- read_worldfootballr_rds("matches_by_date", tag = "fotmob_matches_by_date") - -data_dir <- file.path("data", "fotmob_match_details") -subdata_dir <- file.path(data_dir, "matches") -dir.create(data_dir, showWarnings = FALSE) -dir.create(subdata_dir, showWarnings = FALSE) - -scrape_fotmob_match_details <- function(match_id, overwrite = FALSE) { - rds_path <- file.path(subdata_dir, sprintf("%s.rds", match_id)) - if (file.exists(rds_path) & !overwrite) { - message(sprintf("Returning pre-saved data for %s.", match_id)) - return(read_rds(rds_path)) - } - Sys.sleep(1) - message(sprintf("Scraping matches for %s.", match_id)) - match_details <- fotmob_get_match_details(match_id) - write_rds(match_details, rds_path) - match_details -} - -# scrape_fotmob_match_details <- function(match_id, overwrite = FALSE) { -# Sys.sleep(1) -# message(sprintf("Scraping matches for %s.", match_id)) -# fotmob_get_match_details(match_id) -# } -possibly_scrape_fotmob_match_details <- possibly(scrape_fotmob_match_details, otherwise = tibble(), quiet = FALSE) - -league_id_mapping <- tibble( - league_id = c(48,110,146, 86,140,8972), - country = c('ENG', 'FRA', 'GER', 'ITA', 'ESP', 'USA'), - tier = rep('2nd', 6) -) - -league_start_dates <- league_id_mapping |> - mutate( - data = map2( - country, tier, - ~load_match_results( - country = ..1, - gender = "M", - season_end_year = 2021, - tier = ..2 - ) - ) - ) |> - unnest(data) |> - group_by(league_id) |> - slice_min(Date, n = 1, with_ties = FALSE) |> - select(league_id, date = Date) - -other_league_ids <- 44 - -league_start_date_mapping <- setNames(league_start_dates$date, league_start_dates$league_id) - -scrape_fotmob_match_details_for_league <- function(league_id) { - first_date <- if (league_id %in% other_league_ids) { - as.Date("2020-06-01") - } else { - as.Date(league_start_date_mapping[[as.character(league_id)]]) - } - - rds_path <- file.path(data_dir, sprintf("%s_match_details.rds", league_id)) - path_exists <- file.exists(rds_path) - csv_path <- file.path(data_dir, sprintf("%s_match_details.csv", league_id)) - - if (isTRUE(path_exists)) { - existing_match_details <- read_rds(rds_path) - existing_match_ids <- unique(existing_match_details$match_id) - existing_matches_by_date <- matches_by_date |> - filter(!(match_id %in% existing_match_ids)) - } else { - existing_match_details <- tibble() - existing_match_ids <- integer() - existing_matches_by_date <- matches_by_date - } - - new_matches_by_date <- existing_matches_by_date |> - filter(primary_id == !!league_id) |> - filter(date >= !!first_date) |> - filter(!(match_id %in% existing_match_ids)) |> - filter(!match_status_cancelled, match_status_finished) - - if (nrow(new_matches_by_date) == 0) { - message(sprintf("Not updating data for `league_id = %s`.", league_id)) - return(existing_match_details) - } - - new_match_details <- new_matches_by_date$match_id[1:2] |> - map_dfr(scrape_fotmob_match_details) - - if (nrow(new_match_details) == 0) { - message(sprintf("Not updating data for `league_id = %s`. Bad matches: %s", league_id, nrow(new_matches_by_date))) - return(existing_match_details) - } - - match_details <- bind_rows( - existing_match_details, - new_match_details - ) - - attr(match_details, "scrape_timestamp") <- scrape_time_utc - write_rds(match_details, rds_path) - write_csv(match_details, csv_path, na = "") - - match_details -} - -as.character(c(league_id_mapping$league_id, other_league_ids)) |> - walk(scrape_fotmob_match_details_for_league) diff --git a/R/fotmob_match_details/update_fotmob_match_details.R b/R/fotmob_match_details/update_fotmob_match_details.R deleted file mode 100644 index 8c1f3552..00000000 --- a/R/fotmob_match_details/update_fotmob_match_details.R +++ /dev/null @@ -1,91 +0,0 @@ -library(worldfootballR) -library(dplyr) -library(purrr) -library(tidyr) -library(janitor) -library(readr) - -source("R/piggyback.R") -matches_by_date <- read_worldfootballr_rds("matches_by_date", tag = "fotmob_matches_by_date") - -scrape_fotmob_match_details <- function(match_id) { - Sys.sleep(1) - message(sprintf("Scraping matches for %s.", match_id)) - fotmob_get_match_details(match_id) -} -possibly_scrape_fotmob_match_details <- possibly(scrape_fotmob_match_details, otherwise = tibble(), quiet = FALSE) - -league_id_mapping <- c( - "47" = "ENG", - "53" = "FRA", - "54" = "GER", - "55" = "ITA", - "87" = "ESP", - "130" = "USA" -) -other_league_ids <- as.character(c(50, 42, 73)) - -league_start_dates <- league_id_mapping |> - imap_dfr( - ~load_match_results( - country = .x, - gender = "M", - season_end_year = 2021, - tier = "1st" - ) |> - slice_min(Date, n = 1, with_ties = FALSE) |> - select(date = Date) |> - mutate(league_id = .y, .before = 1) - ) -league_start_date_mapping <- setNames(league_start_dates$date, league_start_dates$league_id) - -scrape_fotmob_match_details_for_league <- function(league_id) { - first_date <- if (league_id %in% other_league_ids) { - as.Date("2020-06-01") - } else { - as.Date(league_start_date_mapping[[league_id]]) - } - - existing_match_details <- read_worldfootballr_rds( - sprintf("%s_match_details", league_id), - tag = "fotmob_match_details" - ) - existing_match_ids <- unique(existing_match_details$match_id) - - new_matches_by_date <- matches_by_date |> - filter(primary_id == !!league_id) |> - filter(date >= !!first_date) |> - filter(!(match_id %in% existing_match_ids)) |> - filter(!match_status_cancelled, match_status_finished) - - if (nrow(new_matches_by_date) == 0) { - message(sprintf("Not updating data for `league_id = %s`.", league_id)) - return(existing_match_details) - } - - scrape_time_utc <- as.POSIXlt(Sys.time(), tz = "UTC") - - new_match_details <- new_matches_by_date$match_id |> - map_dfr(possibly_scrape_fotmob_match_details) - - if (nrow(new_match_details) == 0) { - message(sprintf("Not updating data for `league_id = %s`. Bad matches: %s", league_id, nrow(new_matches_by_date))) - return(existing_match_details) - } - - match_details <- bind_rows( - existing_match_details, - new_match_details - ) - - attr(match_details, "scrape_timestamp") <- scrape_time_utc - write_worldfootballr_rds_and_csv( - x = match_details, - name = sprintf("%s_match_details", league_id), - tag = "fotmob_match_details" - ) - match_details -} - -c(names(league_id_mapping), other_league_ids) |> - walk(scrape_fotmob_match_details_for_league) diff --git a/R/fotmob_matches_by_date/backfill_fotmob_matches_by_date.R b/R/fotmob_matches_by_date/backfill_fotmob_matches_by_date.R deleted file mode 100644 index b4a333fe..00000000 --- a/R/fotmob_matches_by_date/backfill_fotmob_matches_by_date.R +++ /dev/null @@ -1,55 +0,0 @@ -library(worldfootballR) -library(dplyr) -library(tidyr) -library(purrr) -library(lubridate) -library(janitor) -library(readr) - -source("R/piggyback.R") - -scrape_time_utc <- as.POSIXlt(Sys.time(), tz = "UTC") -yesterday <- Sys.Date() - 1 - -dates <- seq.Date(as.Date("2017-08-11"), yesterday, by = "day") - -scrape_fotmob_matches_on_date <- function(date, overwrite = FALSE) { - Sys.sleep(1) - message(sprintf("Scraping matches for %s.", date)) - fotmob_get_matches_by_date(date) -} - -possibly_scrape_fotmob_matches_on_date <- possibly(scrape_fotmob_matches_on_date, otherwise = tibble(), quiet = FALSE) - -matches_by_date <- dates |> - set_names() |> - map_dfr( - possibly_scrape_fotmob_matches_on_date, - .id = "date" - ) - -ids_to_backfill <- c(48, 110, 146, 86, 140, 8972) -walk( - ids_to_backfill, - ~{ - league_matches_by_date <- all_matches_by_date |> filter(primary_id == .x) - if (nrow(league_matches_by_date) == 0) { - message("No matches.") - return(invisible()) - } - attr(.x, "scrape_timestamp") <- scrape_time_utc - write_worldfootballr_rds_and_csv( - x = league_matches_by_date, - name = sprintf("%s_matches_by_date", .x), - tag = "fotmob_matches_by_date" - ) - } -) - -# attr(matches_by_date, "scrape_timestamp") <- scrape_time_utc -# write_worldfootballr( -# matches_by_date, -# name = "matches_by_date", -# tag = "fotmob_matches_by_date", -# ext = "rds" -# ) \ No newline at end of file diff --git a/R/fotmob_matches_by_date/update_fotmob_matches_by_date.R b/R/fotmob_matches_by_date/update_fotmob_matches_by_date.R deleted file mode 100644 index 202bcf21..00000000 --- a/R/fotmob_matches_by_date/update_fotmob_matches_by_date.R +++ /dev/null @@ -1,72 +0,0 @@ -library(worldfootballR) -library(dplyr) -library(tidyr) -library(purrr) -library(lubridate) -library(janitor) -library(readr) - -source("R/piggyback.R") - -scrape_time_utc <- as.POSIXlt(Sys.time(), tz = "UTC") -yesterday <- Sys.Date() - 1 - -dates <- seq.Date(as.Date("2017-08-11"), yesterday, by = "day") -existing_matches_by_date <- read_worldfootballr_rds("matches_by_date", tag = "fotmob_matches_by_date") -existing_dates <- existing_matches_by_date |> - distinct(date) |> - pull(date) |> - ymd() -new_dates <- as.Date(setdiff(dates, existing_dates), origin = "1970-01-01") - -if (length(new_dates) == 0) { - message("Skipping since there are no new dates.") -} else { - - scrape_fotmob_matches_on_date <- function(date, overwrite = FALSE) { - Sys.sleep(1) - message(sprintf("Scraping matches for %s.", date)) - fotmob_get_matches_by_date(date) - } - possibly_scrape_fotmob_matches_on_date <- possibly(scrape_fotmob_matches_on_date, otherwise = tibble(), quiet = FALSE) - - new_matches_by_date <- new_dates |> - set_names() |> - map_dfr( - possibly_scrape_fotmob_matches_on_date, - .id = "date" - ) - - matches_by_date <- bind_rows( - existing_matches_by_date, - new_matches_by_date - ) |> - distinct() - - popular_league_ids <- c(50, 42, 44, 73, 47, 54, 87, 53, 130, 55) - tier2_big5_and_mls_ids <- c(48, 110, 146, 86, 140, 8972) - all_league_ids <- c(popular_league_ids, tier2_big5_and_mls_ids) - walk( - all_league_ids, - ~{ - new_league_matches_by_date <- new_matches_by_date |> filter(primary_id == .x) - if (nrow(new_league_matches_by_date) == 0) { - return(invisible()) - } - league_matches_by_date <- matches_by_date |> filter(primary_id == .x) - attr(.x, "scrape_timestamp") <- scrape_time_utc - write_worldfootballr_rds_and_csv( - x = league_matches_by_date, - name = sprintf("%s_matches_by_date", .x), - tag = "fotmob_matches_by_date" - ) - } - ) - - attr(matches_by_date, "scrape_timestamp") <- scrape_time_utc - write_worldfootballr_rds_and_csv( - matches_by_date, - name = "matches_by_date", - tag = "fotmob_matches_by_date" - ) -} diff --git a/raw-data/fotmob-leagues/get_fotmob_leagues.R b/raw-data/fotmob-leagues/get_fotmob_leagues.R deleted file mode 100644 index 4aa02e96..00000000 --- a/raw-data/fotmob-leagues/get_fotmob_leagues.R +++ /dev/null @@ -1,9 +0,0 @@ -library(here) -library(worldfootballR) -all_leagues <- worldfootballR::fotmob_get_league_ids(cached = FALSE) - -write.csv( - all_leagues, - here::here("raw-data", "fotmob-leagues", "all_leagues.csv"), - row.names = FALSE -)