From b747385ecdcbdf11b289fe72befdeac6265b29b4 Mon Sep 17 00:00:00 2001 From: aymennasri Date: Tue, 28 Jan 2025 01:34:45 +0100 Subject: [PATCH 1/9] Fixed redundancy. --- R/tm_league_injuries.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/tm_league_injuries.R b/R/tm_league_injuries.R index a9164730..77eeb938 100644 --- a/R/tm_league_injuries.R +++ b/R/tm_league_injuries.R @@ -1,6 +1,6 @@ #' Get league injuries #' -#' Returns a data frame of all currently injured players players for a selected league +#' Returns a data frame of all currently injured players for a selected league #' #' @param country_name the country of the league's players #' @param league_url league url from transfermarkt.com. To be used when country_name not available in main function From 6210803c62a18574fd1370fb502c4797a528ed92 Mon Sep 17 00:00:00 2001 From: aymennasri Date: Tue, 28 Jan 2025 02:33:32 +0100 Subject: [PATCH 2/9] Added a half baked solution to #411, get_risk_of_suspension() with a url is the only thing working. --- DESCRIPTION | 2 +- NAMESPACE | 2 + R/tm_league_suspensions.R | 218 ++++++++++++++++++++++++++++++++++ man/get_risk_of_suspension.Rd | 19 +++ man/get_suspensions.Rd | 19 +++ man/tm_league_injuries.Rd | 2 +- 6 files changed, 260 insertions(+), 2 deletions(-) create mode 100644 R/tm_league_suspensions.R create mode 100644 man/get_risk_of_suspension.Rd create mode 100644 man/get_suspensions.Rd diff --git a/DESCRIPTION b/DESCRIPTION index b1fe69b4..12d5dc1f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -54,4 +54,4 @@ Suggests: rmarkdown, testthat Encoding: UTF-8 -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.2 diff --git a/NAMESPACE b/NAMESPACE index 78dabe42..7c959c6d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -32,7 +32,9 @@ export(get_match_shooting) export(get_match_summary) export(get_match_urls) export(get_player_market_values) +export(get_risk_of_suspension) export(get_season_team_stats) +export(get_suspensions) export(get_team_match_results) export(load_fb_advanced_match_stats) export(load_fb_big5_advanced_season_stats) diff --git a/R/tm_league_suspensions.R b/R/tm_league_suspensions.R new file mode 100644 index 00000000..ce017845 --- /dev/null +++ b/R/tm_league_suspensions.R @@ -0,0 +1,218 @@ +#' Get League Suspensions +#' +#' Returns a data frame of all suspended players in a selected league +#' +#' @param country_name The country of the league (used if league_url not provided) +#' @param league_url Optional direct league URL from transfermarkt.com +#' @return A dataframe of suspended players +#' @importFrom magrittr %>% +#' @importFrom rlang .data +#' @export + +get_suspensions <- function(country_name = NA, league_url = NA) { + main_url <- "https://www.transfermarkt.com" + + .replace_empty_na <- function(x) { + ifelse(is.na(x) | x == "", NA_character_, x) + } + + if (is.na(league_url) && is.na(country_name)) { + stop("You must provide either league_url or country_name") + } + + if (is.na(league_url)) { + tryCatch({ + meta_df <- utils::read.csv(url("https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/master/raw-data/transfermarkt_leagues/main_comp_seasons.csv"), + stringsAsFactors = FALSE) + meta_filtered <- meta_df %>% dplyr::filter(.data[["country"]] == country_name) + + if (nrow(meta_filtered) == 0) { + stop(glue::glue("Country {country_name} not found in metadata")) + } + + comp_url <- meta_filtered$comp_url[1] + comp_name <- meta_filtered$comp_name[1] + country <- country_name + }, error = function(e) { + stop("Failed to retrieve league metadata") + }) + } else { + tryCatch({ + league_page <- xml2::read_html(league_url) + comp_url <- league_url + comp_name <- league_page %>% + rvest::html_node("h1.data-header__headline") %>% + rvest::html_text() %>% + stringr::str_squish() + country <- league_page %>% + rvest::html_nodes(".data-header img") %>% + rvest::html_attr("alt") %>% + .[1] %>% + stringr::str_squish() + }, error = function(e) { + stop(glue::glue("Invalid league URL: {league_url}")) + }) + } + + suspensions_url <- comp_url %>% + gsub("startseite", "sperrenausfaelle", .) %>% + paste0(., "/plus/1") + + tryCatch({ + page <- xml2::read_html(suspensions_url) + }, error = function(e) { + warning(glue::glue("Failed to read suspensions page: {suspensions_url}")) + return(data.frame()) + }) + + suspensions_data <- tryCatch({ + boxes <- page %>% rvest::html_nodes("div.box") + suspensions_table <- boxes[[1]] %>% rvest::html_node("table.items") + + rows <- suspensions_table %>% + rvest::html_nodes("tbody tr") %>% + purrr::keep(~length(rvest::html_node(.x, "table.inline-table")) > 0) + + purrr::map_df(rows, ~{ + player_info <- .x %>% rvest::html_node("td:first-child table.inline-table") + + .parse_date <- function(date_text) { + if (grepl("\\d{2}\\.\\d{2}\\.\\d{4}", date_text)) { + lubridate::dmy(date_text) + } else { + NA_Date_ + } + } + + tibble::tibble( + Player = tryCatch(player_info %>% rvest::html_node("td:nth-child(2) a") %>% + rvest::html_text(trim = TRUE), error = function(e) NA_character_), + Position = tryCatch(player_info %>% rvest::html_node("tr:nth-child(2) td") %>% + rvest::html_text(trim = TRUE), error = function(e) NA_character_), + Club = tryCatch(.x %>% rvest::html_node("td:nth-child(2) a img") %>% + rvest::html_attr("title"), error = function(e) NA_character_), + Age = tryCatch(.x %>% rvest::html_node("td:nth-child(3)") %>% + rvest::html_text(trim = TRUE) %>% as.numeric(), error = function(e) NA_real_), + Reason = tryCatch(.x %>% rvest::html_node("td:nth-child(4)") %>% + rvest::html_text(trim = TRUE), error = function(e) NA_character_), + Since = tryCatch(.parse_date(.x %>% rvest::html_node("td:nth-child(5)") %>% + rvest::html_text(trim = TRUE)), error = function(e) NA_Date_), + Until = tryCatch(.parse_date(.x %>% rvest::html_node("td:nth-child(6)") %>% + rvest::html_text(trim = TRUE)), error = function(e) NA_Date_), + Matches_Missed = tryCatch(.x %>% rvest::html_node("td:nth-child(7)") %>% + rvest::html_text(trim = TRUE) %>% as.numeric(), error = function(e) NA_real_) + ) + }) %>% + dplyr::mutate(dplyr::across(where(is.character), .replace_empty_na)) + }, error = function(e) { + warning("Failed to extract suspension data") + return(data.frame()) + }) + + if (nrow(suspensions_data) > 0) { + suspensions_data <- suspensions_data %>% + dplyr::mutate( + Country = ifelse(is.na(league_url), country_name, country), + Competition = comp_name, + .before = 1 + ) + } + + return(suspensions_data) +} + +#' Get Risk of Suspension +#' +#' Returns a data frame of players at risk of suspension +#' +#' @inheritParams get_suspensions +#' @return A dataframe of players at risk of suspension +#' @export +get_risk_of_suspension <- function(country_name, league_url = NA) { + main_url <- "https://www.transfermarkt.com" + + .replace_empty_na <- function(x) { + ifelse(is.na(x) | x == "", NA_character_, x) + } + + if(is.na(league_url)) { + tryCatch({ + meta_df <- utils::read.csv(url("https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/master/raw-data/transfermarkt_leagues/main_comp_seasons.csv"), + stringsAsFactors = FALSE) + meta_filtered <- meta_df %>% dplyr::filter(.data[["country"]] == country_name) + comp_url <- meta_filtered$comp_url[1] + comp_name <- meta_filtered$comp_name[1] + }, error = function(e) { + stop("Failed to retrieve league metadata") + }) + } else { + tryCatch({ + league_page <- xml2::read_html(league_url) + comp_url <- league_url + comp_name <- league_page %>% + rvest::html_nodes(".data-header__headline-wrapper--oswald") %>% + rvest::html_text() %>% + stringr::str_squish() + country <- league_page %>% + rvest::html_nodes(".data-header img") %>% + rvest::html_attr("alt") %>% + .[1] %>% + stringr::str_squish() + }, error = function(e) { + stop(glue::glue("Invalid league URL: {league_url}")) + }) + } + + risk_url <- comp_url %>% + gsub("startseite", "krankenhaus", .) %>% + paste0(., "/plus/1") + + tryCatch({ + page <- xml2::read_html(risk_url) + }, error = function(e) { + warning(glue::glue("Failed to read risk page: {risk_url}")) + return(data.frame()) + }) + + risk_data <- tryCatch({ + boxes <- page %>% rvest::html_nodes("div.box") + risk_table <- boxes[[2]] %>% rvest::html_node("table.items") + + rows <- risk_table %>% + rvest::html_nodes("tbody tr") %>% + purrr::keep(~length(rvest::html_node(.x, "table.inline-table")) > 0) + + purrr::map_df(rows, ~{ + player_info <- .x %>% rvest::html_node("td:first-child table.inline-table") + + tibble::tibble( + Player = tryCatch(player_info %>% rvest::html_node("td:nth-child(2) a") %>% + rvest::html_text(trim = TRUE), error = function(e) NA), + Position = tryCatch(player_info %>% rvest::html_node("tr:nth-child(2) td") %>% + rvest::html_text(trim = TRUE), error = function(e) NA), + Club = tryCatch(.x %>% rvest::html_node("td:nth-child(2) a img") %>% + rvest::html_attr("title") %>% trimws(), error = function(e) NA), + Age = tryCatch(.x %>% rvest::html_node("td:nth-child(3)") %>% + rvest::html_text(trim = TRUE) %>% as.numeric(), error = function(e) NA), + Yellow_Cards = tryCatch(.x %>% rvest::html_node("td:nth-child(4)") %>% + rvest::html_text(trim = TRUE) %>% as.numeric(), error = function(e) NA) + ) + }) %>% + dplyr::mutate(dplyr::across(where(is.character), .replace_empty_na)) + }, error = function(e) { + warning("Failed to extract risk data") + return(data.frame()) + }) + + if(is.na(league_url)) { + risk_data %>% + dplyr::mutate(Country = country_name, + Competition = comp_name, + .before = 1) + } else { + risk_data %>% + dplyr::mutate(Country = country, + Competition = comp_name, + .before = 1) + } +} diff --git a/man/get_risk_of_suspension.Rd b/man/get_risk_of_suspension.Rd new file mode 100644 index 00000000..6ab53296 --- /dev/null +++ b/man/get_risk_of_suspension.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tm_league_suspensions.R +\name{get_risk_of_suspension} +\alias{get_risk_of_suspension} +\title{Get Risk of Suspension} +\usage{ +get_risk_of_suspension(country_name, league_url = NA) +} +\arguments{ +\item{country_name}{The country of the league (used if league_url not provided)} + +\item{league_url}{Optional direct league URL from transfermarkt.com} +} +\value{ +A dataframe of players at risk of suspension +} +\description{ +Returns a data frame of players at risk of suspension +} diff --git a/man/get_suspensions.Rd b/man/get_suspensions.Rd new file mode 100644 index 00000000..5235b256 --- /dev/null +++ b/man/get_suspensions.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tm_league_suspensions.R +\name{get_suspensions} +\alias{get_suspensions} +\title{Get League Suspensions} +\usage{ +get_suspensions(country_name = NA, league_url = NA) +} +\arguments{ +\item{country_name}{The country of the league (used if league_url not provided)} + +\item{league_url}{Optional direct league URL from transfermarkt.com} +} +\value{ +A dataframe of suspended player +} +\description{ +Returns a data frame of all suspended players in a selected league +} diff --git a/man/tm_league_injuries.Rd b/man/tm_league_injuries.Rd index 8341b445..80c51f70 100644 --- a/man/tm_league_injuries.Rd +++ b/man/tm_league_injuries.Rd @@ -15,5 +15,5 @@ tm_league_injuries(country_name, league_url = NA) returns a dataframe of injured players in the selected league } \description{ -Returns a data frame of all currently injured players players for a selected league +Returns a data frame of all currently injured players for a selected league } From 43b2e4826d9f1e6967649e5d916509bfa7bdff44 Mon Sep 17 00:00:00 2001 From: aymennasri Date: Tue, 28 Jan 2025 13:53:05 +0100 Subject: [PATCH 3/9] Fixed the two functions as they now extract data from both URLs and the worldfootballR_data repo. --- R/tm_league_suspensions.R | 54 ++++++++++++++++++++++++++------------- man/get_suspensions.Rd | 2 +- 2 files changed, 37 insertions(+), 19 deletions(-) diff --git a/R/tm_league_suspensions.R b/R/tm_league_suspensions.R index ce017845..e58e5b53 100644 --- a/R/tm_league_suspensions.R +++ b/R/tm_league_suspensions.R @@ -41,7 +41,7 @@ get_suspensions <- function(country_name = NA, league_url = NA) { league_page <- xml2::read_html(league_url) comp_url <- league_url comp_name <- league_page %>% - rvest::html_node("h1.data-header__headline") %>% + rvest::html_nodes(".data-header__headline-wrapper--oswald") %>% rvest::html_text() %>% stringr::str_squish() country <- league_page %>% @@ -85,22 +85,39 @@ get_suspensions <- function(country_name = NA, league_url = NA) { } tibble::tibble( - Player = tryCatch(player_info %>% rvest::html_node("td:nth-child(2) a") %>% - rvest::html_text(trim = TRUE), error = function(e) NA_character_), - Position = tryCatch(player_info %>% rvest::html_node("tr:nth-child(2) td") %>% - rvest::html_text(trim = TRUE), error = function(e) NA_character_), - Club = tryCatch(.x %>% rvest::html_node("td:nth-child(2) a img") %>% - rvest::html_attr("title"), error = function(e) NA_character_), - Age = tryCatch(.x %>% rvest::html_node("td:nth-child(3)") %>% - rvest::html_text(trim = TRUE) %>% as.numeric(), error = function(e) NA_real_), - Reason = tryCatch(.x %>% rvest::html_node("td:nth-child(4)") %>% - rvest::html_text(trim = TRUE), error = function(e) NA_character_), - Since = tryCatch(.parse_date(.x %>% rvest::html_node("td:nth-child(5)") %>% - rvest::html_text(trim = TRUE)), error = function(e) NA_Date_), - Until = tryCatch(.parse_date(.x %>% rvest::html_node("td:nth-child(6)") %>% - rvest::html_text(trim = TRUE)), error = function(e) NA_Date_), - Matches_Missed = tryCatch(.x %>% rvest::html_node("td:nth-child(7)") %>% - rvest::html_text(trim = TRUE) %>% as.numeric(), error = function(e) NA_real_) + Player = tryCatch(player_info %>% + rvest::html_node("td:nth-child(2) a") %>% + rvest::html_text(trim = TRUE), + error = function(e) NA_character_), + Position = tryCatch(player_info %>% + rvest::html_node("tr:nth-child(2) td") %>% + rvest::html_text(trim = TRUE), + error = function(e) NA_character_), + Club = tryCatch(.x %>% + rvest::html_node("td:nth-child(2) a img") %>% + rvest::html_attr("title"), + error = function(e) NA_character_), + Age = tryCatch(.x %>% + rvest::html_node("td:nth-child(3)") %>% + rvest::html_text(trim = TRUE) %>% as.numeric(), + error = function(e) NA_real_), + Reason = tryCatch(.x %>% + rvest::html_node("td:nth-child(4)") %>% + rvest::html_text(trim = TRUE), + error = function(e) NA_character_), + Since = tryCatch(.x %>% + rvest::html_node("td:nth-child(5)") %>% + rvest::html_text(trim = TRUE), + error = function(e) NA_Date_), + Until = tryCatch(.x %>% + rvest::html_node("td:nth-child(6)") %>% + rvest::html_text(trim = TRUE), + error = function(e) NA_Date_), + Matches_Missed = tryCatch(.x %>% + rvest::html_node("td:nth-child(7)") %>% + rvest::html_text(trim = TRUE) %>% + as.numeric(), + error = function(e) NA_real_) ) }) %>% dplyr::mutate(dplyr::across(where(is.character), .replace_empty_na)) @@ -128,6 +145,7 @@ get_suspensions <- function(country_name = NA, league_url = NA) { #' @inheritParams get_suspensions #' @return A dataframe of players at risk of suspension #' @export + get_risk_of_suspension <- function(country_name, league_url = NA) { main_url <- "https://www.transfermarkt.com" @@ -164,7 +182,7 @@ get_risk_of_suspension <- function(country_name, league_url = NA) { } risk_url <- comp_url %>% - gsub("startseite", "krankenhaus", .) %>% + gsub("startseite", "sperrenausfaelle", .) %>% paste0(., "/plus/1") tryCatch({ diff --git a/man/get_suspensions.Rd b/man/get_suspensions.Rd index 5235b256..6312778e 100644 --- a/man/get_suspensions.Rd +++ b/man/get_suspensions.Rd @@ -12,7 +12,7 @@ get_suspensions(country_name = NA, league_url = NA) \item{league_url}{Optional direct league URL from transfermarkt.com} } \value{ -A dataframe of suspended player +A dataframe of suspended players } \description{ Returns a data frame of all suspended players in a selected league From b5392cc460e546ad64a49432465d4d0c895b50d1 Mon Sep 17 00:00:00 2001 From: aymennasri Date: Tue, 28 Jan 2025 14:17:32 +0100 Subject: [PATCH 4/9] Successfully dealt with dates. Should be ready for a merge now. --- R/tm_league_suspensions.R | 29 +++++++++++++---------------- man/get_risk_of_suspension.Rd | 4 ++-- man/get_suspensions.Rd | 2 +- 3 files changed, 16 insertions(+), 19 deletions(-) diff --git a/R/tm_league_suspensions.R b/R/tm_league_suspensions.R index e58e5b53..52ad6c77 100644 --- a/R/tm_league_suspensions.R +++ b/R/tm_league_suspensions.R @@ -4,7 +4,7 @@ #' #' @param country_name The country of the league (used if league_url not provided) #' @param league_url Optional direct league URL from transfermarkt.com -#' @return A dataframe of suspended players +#' @return A data frame of suspended players #' @importFrom magrittr %>% #' @importFrom rlang .data #' @export @@ -76,14 +76,6 @@ get_suspensions <- function(country_name = NA, league_url = NA) { purrr::map_df(rows, ~{ player_info <- .x %>% rvest::html_node("td:first-child table.inline-table") - .parse_date <- function(date_text) { - if (grepl("\\d{2}\\.\\d{2}\\.\\d{4}", date_text)) { - lubridate::dmy(date_text) - } else { - NA_Date_ - } - } - tibble::tibble( Player = tryCatch(player_info %>% rvest::html_node("td:nth-child(2) a") %>% @@ -99,19 +91,24 @@ get_suspensions <- function(country_name = NA, league_url = NA) { error = function(e) NA_character_), Age = tryCatch(.x %>% rvest::html_node("td:nth-child(3)") %>% - rvest::html_text(trim = TRUE) %>% as.numeric(), + rvest::html_text(trim = TRUE) %>% + as.numeric(), error = function(e) NA_real_), Reason = tryCatch(.x %>% rvest::html_node("td:nth-child(4)") %>% rvest::html_text(trim = TRUE), error = function(e) NA_character_), Since = tryCatch(.x %>% - rvest::html_node("td:nth-child(5)") %>% - rvest::html_text(trim = TRUE), + rvest::html_node("td:nth-child(5)") %>% + rvest::html_text(trim = TRUE) %>% + .tm_fix_dates() %>% + as.Date(), error = function(e) NA_Date_), Until = tryCatch(.x %>% - rvest::html_node("td:nth-child(6)") %>% - rvest::html_text(trim = TRUE), + rvest::html_node("td:nth-child(6)") %>% + rvest::html_text(trim = TRUE) %>% + .tm_fix_dates() %>% + as.Date(), error = function(e) NA_Date_), Matches_Missed = tryCatch(.x %>% rvest::html_node("td:nth-child(7)") %>% @@ -138,12 +135,12 @@ get_suspensions <- function(country_name = NA, league_url = NA) { return(suspensions_data) } -#' Get Risk of Suspension +#' Get League Risk of Suspension #' #' Returns a data frame of players at risk of suspension #' #' @inheritParams get_suspensions -#' @return A dataframe of players at risk of suspension +#' @return A data frame of players at risk of suspension #' @export get_risk_of_suspension <- function(country_name, league_url = NA) { diff --git a/man/get_risk_of_suspension.Rd b/man/get_risk_of_suspension.Rd index 6ab53296..90489c30 100644 --- a/man/get_risk_of_suspension.Rd +++ b/man/get_risk_of_suspension.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/tm_league_suspensions.R \name{get_risk_of_suspension} \alias{get_risk_of_suspension} -\title{Get Risk of Suspension} +\title{Get League Risk of Suspension} \usage{ get_risk_of_suspension(country_name, league_url = NA) } @@ -12,7 +12,7 @@ get_risk_of_suspension(country_name, league_url = NA) \item{league_url}{Optional direct league URL from transfermarkt.com} } \value{ -A dataframe of players at risk of suspension +A data frame of players at risk of suspension } \description{ Returns a data frame of players at risk of suspension diff --git a/man/get_suspensions.Rd b/man/get_suspensions.Rd index 6312778e..dd6fa55c 100644 --- a/man/get_suspensions.Rd +++ b/man/get_suspensions.Rd @@ -12,7 +12,7 @@ get_suspensions(country_name = NA, league_url = NA) \item{league_url}{Optional direct league URL from transfermarkt.com} } \value{ -A dataframe of suspended players +A data frame of suspended players } \description{ Returns a data frame of all suspended players in a selected league From 9314d30ef4ba9cc8860012f5b0eaabeb4660359b Mon Sep 17 00:00:00 2001 From: aymennasri Date: Sat, 1 Feb 2025 15:06:23 +0100 Subject: [PATCH 5/9] Changed the functions names to match the other Transfermarkt functions. --- NAMESPACE | 4 ++-- R/tm_league_suspensions.R | 6 +++--- ...t_risk_of_suspension.Rd => tm_get_risk_of_suspension.Rd} | 6 +++--- man/{get_suspensions.Rd => tm_get_suspensions.Rd} | 6 +++--- 4 files changed, 11 insertions(+), 11 deletions(-) rename man/{get_risk_of_suspension.Rd => tm_get_risk_of_suspension.Rd} (78%) rename man/{get_suspensions.Rd => tm_get_suspensions.Rd} (80%) diff --git a/NAMESPACE b/NAMESPACE index 7c959c6d..8d3fe6b2 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -32,9 +32,7 @@ export(get_match_shooting) export(get_match_summary) export(get_match_urls) export(get_player_market_values) -export(get_risk_of_suspension) export(get_season_team_stats) -export(get_suspensions) export(get_team_match_results) export(load_fb_advanced_match_stats) export(load_fb_big5_advanced_season_stats) @@ -47,6 +45,8 @@ export(player_dictionary_mapping) export(player_transfer_history) export(tm_expiring_contracts) export(tm_get_player_absence) +export(tm_get_risk_of_suspension) +export(tm_get_suspensions) export(tm_league_debutants) export(tm_league_injuries) export(tm_league_team_urls) diff --git a/R/tm_league_suspensions.R b/R/tm_league_suspensions.R index 52ad6c77..d053f496 100644 --- a/R/tm_league_suspensions.R +++ b/R/tm_league_suspensions.R @@ -9,7 +9,7 @@ #' @importFrom rlang .data #' @export -get_suspensions <- function(country_name = NA, league_url = NA) { +tm_get_suspensions <- function(country_name = NA, league_url = NA) { main_url <- "https://www.transfermarkt.com" .replace_empty_na <- function(x) { @@ -139,11 +139,11 @@ get_suspensions <- function(country_name = NA, league_url = NA) { #' #' Returns a data frame of players at risk of suspension #' -#' @inheritParams get_suspensions +#' @inheritParams tm_get_suspensions #' @return A data frame of players at risk of suspension #' @export -get_risk_of_suspension <- function(country_name, league_url = NA) { +tm_get_risk_of_suspension <- function(country_name, league_url = NA) { main_url <- "https://www.transfermarkt.com" .replace_empty_na <- function(x) { diff --git a/man/get_risk_of_suspension.Rd b/man/tm_get_risk_of_suspension.Rd similarity index 78% rename from man/get_risk_of_suspension.Rd rename to man/tm_get_risk_of_suspension.Rd index 90489c30..444dc9bf 100644 --- a/man/get_risk_of_suspension.Rd +++ b/man/tm_get_risk_of_suspension.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/tm_league_suspensions.R -\name{get_risk_of_suspension} -\alias{get_risk_of_suspension} +\name{tm_get_risk_of_suspension} +\alias{tm_get_risk_of_suspension} \title{Get League Risk of Suspension} \usage{ -get_risk_of_suspension(country_name, league_url = NA) +tm_get_risk_of_suspension(country_name, league_url = NA) } \arguments{ \item{country_name}{The country of the league (used if league_url not provided)} diff --git a/man/get_suspensions.Rd b/man/tm_get_suspensions.Rd similarity index 80% rename from man/get_suspensions.Rd rename to man/tm_get_suspensions.Rd index dd6fa55c..57657ee8 100644 --- a/man/get_suspensions.Rd +++ b/man/tm_get_suspensions.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/tm_league_suspensions.R -\name{get_suspensions} -\alias{get_suspensions} +\name{tm_get_suspensions} +\alias{tm_get_suspensions} \title{Get League Suspensions} \usage{ -get_suspensions(country_name = NA, league_url = NA) +tm_get_suspensions(country_name = NA, league_url = NA) } \arguments{ \item{country_name}{The country of the league (used if league_url not provided)} From a75064aa103a42eef20fdbe548a92325a1212727 Mon Sep 17 00:00:00 2001 From: aymennasri Date: Sat, 1 Feb 2025 23:10:21 +0100 Subject: [PATCH 6/9] Added unit tests for the two functions. --- tests/testthat/test-transfermarkt.R | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tests/testthat/test-transfermarkt.R b/tests/testthat/test-transfermarkt.R index acf7fb8e..e6ade09f 100644 --- a/tests/testthat/test-transfermarkt.R +++ b/tests/testthat/test-transfermarkt.R @@ -252,3 +252,22 @@ test_that("tm_get_player_absence() works", { }) +test_that("tm_get_suspensions() works", { + testthat::skip_on_cran() + + player_suspensions <- tm_get_suspensions(league_url = "https://www.transfermarkt.com/jupiler-pro-league/sperrenausfaelle/wettbewerb/BE1") + expect_type(player_suspensions, "list") + expect_equal(ncol(player_suspensions), 10) + expect_false(nrow(player_suspensions) == 0) +}) + + +test_that("tm_get_risk_of_suspensions() works", { + testthat::skip_on_cran() + + player_stats <- tm_get_risk_of_suspension(league_url = "https://www.transfermarkt.com/jupiler-pro-league/sperrenausfaelle/wettbewerb/BE1") + expect_type(player_stats, "list") + expect_equal(ncol(player_stats), 7) + expect_false(nrow(player_stats) == 0) + +}) From 200d6c32cf117fbc80dd0033a6e333752b833832 Mon Sep 17 00:00:00 2001 From: aymennasri Date: Sat, 1 Feb 2025 23:21:16 +0100 Subject: [PATCH 7/9] Increment version number to 0.6.7 --- DESCRIPTION | 2 +- NEWS.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 12d5dc1f..a44491ae 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: worldfootballR Title: Extract and Clean World Football (Soccer) Data -Version: 0.6.6.0001 +Version: 0.6.7 Authors@R: c( person("Jason", "Zivkovic", , "jaseziv83@gmail.com", role = c("aut", "cre", "cph")), person("Tony", "ElHabr", , "anthonyelhabr@gmail.com", role = "ctb"), diff --git a/NEWS.md b/NEWS.md index 6327c6b2..0ef9cc26 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# worldfootballR (development version) +# worldfootballR 0.6.7 ### Bugs From 2887570fdd20e560a279e72047f0f43a4d383a6b Mon Sep 17 00:00:00 2001 From: aymennasri Date: Sat, 1 Feb 2025 23:41:12 +0100 Subject: [PATCH 8/9] Updated the NEWS.md file. --- NEWS.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/NEWS.md b/NEWS.md index 0ef9cc26..41dbeadb 100644 --- a/NEWS.md +++ b/NEWS.md @@ -7,6 +7,10 @@ ### Improvements * `tm_expiring_contracts()` returns additional `date_of_birth` column. (0.6.6.0001) [#355](https://github.com/JaseZiv/worldfootballR/issues/397) +* `tm_player_bio()` now includes three new columns. `picture_url` containing the URL of the player's picture from Transfermarkt, `squad_number` containing the current worn squad number of the player and `player_id` that that contains the player ID on Transfermarkt. +* `get_player_info()` added. ([#47](https://github.com/JaseZiv/worldfootballR/issues/47)) +* `tm_get_suspensions()` and `tm_get_risk_of_suspensions()` added. ([#411](https://github.com/JaseZiv/worldfootballR/issues/411)) + *** From d6b960a49063c8ca2b32d46f1d0ba5a41209baac Mon Sep 17 00:00:00 2001 From: Tony ElHabr Date: Fri, 7 Feb 2025 10:57:49 -0600 Subject: [PATCH 9/9] add examples and vignette code --- R/tm_league_suspensions.R | 16 ++++++++++++++-- man/tm_get_risk_of_suspension.Rd | 8 ++++++++ man/tm_get_suspensions.Rd | 8 ++++++++ vignettes/extract-transfermarkt-data.Rmd | 21 ++++++++++++++++++++- 4 files changed, 50 insertions(+), 3 deletions(-) diff --git a/R/tm_league_suspensions.R b/R/tm_league_suspensions.R index d053f496..ac9678ab 100644 --- a/R/tm_league_suspensions.R +++ b/R/tm_league_suspensions.R @@ -8,7 +8,13 @@ #' @importFrom magrittr %>% #' @importFrom rlang .data #' @export - +#' @examples +#' \dontrun{ +#' try({ +#' tm_get_suspensions(country_name = "England") +#' tm_get_suspensions(league_url = "https://www.transfermarkt.com/premier-league/sperrenausfaelle/wettbewerb/GB1") +#' }) +#' } tm_get_suspensions <- function(country_name = NA, league_url = NA) { main_url <- "https://www.transfermarkt.com" @@ -142,7 +148,13 @@ tm_get_suspensions <- function(country_name = NA, league_url = NA) { #' @inheritParams tm_get_suspensions #' @return A data frame of players at risk of suspension #' @export - +#' @examples +#' \dontrun{ +#' try({ +#' tm_get_risk_of_suspension(country_name = "England") +#' tm_get_risk_of_suspension(league_url = "https://www.transfermarkt.com/premier-league/sperrenausfaelle/wettbewerb/GB1") +#' }) +#' } tm_get_risk_of_suspension <- function(country_name, league_url = NA) { main_url <- "https://www.transfermarkt.com" diff --git a/man/tm_get_risk_of_suspension.Rd b/man/tm_get_risk_of_suspension.Rd index 444dc9bf..b3137fc1 100644 --- a/man/tm_get_risk_of_suspension.Rd +++ b/man/tm_get_risk_of_suspension.Rd @@ -17,3 +17,11 @@ A data frame of players at risk of suspension \description{ Returns a data frame of players at risk of suspension } +\examples{ +\dontrun{ +try({ +tm_get_risk_of_suspension(country_name = "England") +tm_get_risk_of_suspension(league_url = "https://www.transfermarkt.com/premier-league/sperrenausfaelle/wettbewerb/GB1") +}) +} +} diff --git a/man/tm_get_suspensions.Rd b/man/tm_get_suspensions.Rd index 57657ee8..41091cc6 100644 --- a/man/tm_get_suspensions.Rd +++ b/man/tm_get_suspensions.Rd @@ -17,3 +17,11 @@ A data frame of suspended players \description{ Returns a data frame of all suspended players in a selected league } +\examples{ +\dontrun{ +try({ +tm_get_suspensions(country_name = "England") +tm_get_suspensions(league_url = "https://www.transfermarkt.com/premier-league/sperrenausfaelle/wettbewerb/GB1") +}) +} +} diff --git a/vignettes/extract-transfermarkt-data.Rmd b/vignettes/extract-transfermarkt-data.Rmd index 84a25d1b..2bfc7da1 100644 --- a/vignettes/extract-transfermarkt-data.Rmd +++ b/vignettes/extract-transfermarkt-data.Rmd @@ -127,7 +127,7 @@ This section will cover the functions to aid in the extraction of season team st ### League Table by Matchdays -To be able to extract league tables for select matchday(s), the below function can be used. +To be able to extract league tables for select matchday(s), the below function can be used. The function can accept either the country name, season start year and matchday number(s), or for leagues not contained in the [worldfootballR_data](https://github.com/JaseZiv/worldfootballR_data/blob/master/raw-data/transfermarkt_leagues/main_comp_seasons.csv) repository, it can accept the league URL, season start year and matchday number(s). @@ -392,5 +392,24 @@ dplyr::glimpse(epl_gk_coach_job_histories) ``` +### Player Suspensions +You can extract players with current suspensions in a league using `tm_get_suspensions()`. +The list of countries that can be passed to the `country_name` argument can be found [here](https://github.com/JaseZiv/worldfootballR_data/blob/master/raw-data/transfermarkt_leagues/main_comp_seasons.csv). `league_url` can also be specified--the same file has a non-exhaustive list of valid URLs in `comp_url`. You may need to specify `league_url` for non-first tier leagues. + +```{r tm_get_suspensions} +england_suspensions <- tm_get_suspensions(country_name = "England") +epl_suspensions <- tm_get_suspensions(league_url = "https://www.transfermarkt.com/premier-league/sperrenausfaelle/wettbewerb/GB1") +dplyr::glimpse(epl_suspensions) +``` + +### Players at Risk of Suspension + +You can extract players at risk of being suspended (e.g. accumulating yellow cards) in a league using `tm_get_risk_of_suspension()`. The function arguments work in the same way as they do for `tm_get_suspensions()`. + +```{r tm_get_risk_of_suspension} +england_risk_of_suspension <- tm_get_risk_of_suspension(country_name = "England") +epl_risk_of_suspension <- tm_get_risk_of_suspension(league_url = "https://www.transfermarkt.com/premier-league/sperrenausfaelle/wettbewerb/GB1") +dplyr::glimpse(epl_risk_of_suspension) +```