diff --git a/DESCRIPTION b/DESCRIPTION index b1fe69b4..a44491ae 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: worldfootballR Title: Extract and Clean World Football (Soccer) Data -Version: 0.6.6.0001 +Version: 0.6.7 Authors@R: c( person("Jason", "Zivkovic", , "jaseziv83@gmail.com", role = c("aut", "cre", "cph")), person("Tony", "ElHabr", , "anthonyelhabr@gmail.com", role = "ctb"), @@ -54,4 +54,4 @@ Suggests: rmarkdown, testthat Encoding: UTF-8 -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.2 diff --git a/NAMESPACE b/NAMESPACE index 78dabe42..8d3fe6b2 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -45,6 +45,8 @@ export(player_dictionary_mapping) export(player_transfer_history) export(tm_expiring_contracts) export(tm_get_player_absence) +export(tm_get_risk_of_suspension) +export(tm_get_suspensions) export(tm_league_debutants) export(tm_league_injuries) export(tm_league_team_urls) diff --git a/NEWS.md b/NEWS.md index 6327c6b2..41dbeadb 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# worldfootballR (development version) +# worldfootballR 0.6.7 ### Bugs @@ -7,6 +7,10 @@ ### Improvements * `tm_expiring_contracts()` returns additional `date_of_birth` column. (0.6.6.0001) [#355](https://github.com/JaseZiv/worldfootballR/issues/397) +* `tm_player_bio()` now includes three new columns. `picture_url` containing the URL of the player's picture from Transfermarkt, `squad_number` containing the current worn squad number of the player and `player_id` that that contains the player ID on Transfermarkt. +* `get_player_info()` added. ([#47](https://github.com/JaseZiv/worldfootballR/issues/47)) +* `tm_get_suspensions()` and `tm_get_risk_of_suspensions()` added. ([#411](https://github.com/JaseZiv/worldfootballR/issues/411)) + *** diff --git a/R/tm_league_injuries.R b/R/tm_league_injuries.R index a9164730..77eeb938 100644 --- a/R/tm_league_injuries.R +++ b/R/tm_league_injuries.R @@ -1,6 +1,6 @@ #' Get league injuries #' -#' Returns a data frame of all currently injured players players for a selected league +#' Returns a data frame of all currently injured players for a selected league #' #' @param country_name the country of the league's players #' @param league_url league url from transfermarkt.com. To be used when country_name not available in main function diff --git a/R/tm_league_suspensions.R b/R/tm_league_suspensions.R new file mode 100644 index 00000000..ac9678ab --- /dev/null +++ b/R/tm_league_suspensions.R @@ -0,0 +1,245 @@ +#' Get League Suspensions +#' +#' Returns a data frame of all suspended players in a selected league +#' +#' @param country_name The country of the league (used if league_url not provided) +#' @param league_url Optional direct league URL from transfermarkt.com +#' @return A data frame of suspended players +#' @importFrom magrittr %>% +#' @importFrom rlang .data +#' @export +#' @examples +#' \dontrun{ +#' try({ +#' tm_get_suspensions(country_name = "England") +#' tm_get_suspensions(league_url = "https://www.transfermarkt.com/premier-league/sperrenausfaelle/wettbewerb/GB1") +#' }) +#' } +tm_get_suspensions <- function(country_name = NA, league_url = NA) { + main_url <- "https://www.transfermarkt.com" + + .replace_empty_na <- function(x) { + ifelse(is.na(x) | x == "", NA_character_, x) + } + + if (is.na(league_url) && is.na(country_name)) { + stop("You must provide either league_url or country_name") + } + + if (is.na(league_url)) { + tryCatch({ + meta_df <- utils::read.csv(url("https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/master/raw-data/transfermarkt_leagues/main_comp_seasons.csv"), + stringsAsFactors = FALSE) + meta_filtered <- meta_df %>% dplyr::filter(.data[["country"]] == country_name) + + if (nrow(meta_filtered) == 0) { + stop(glue::glue("Country {country_name} not found in metadata")) + } + + comp_url <- meta_filtered$comp_url[1] + comp_name <- meta_filtered$comp_name[1] + country <- country_name + }, error = function(e) { + stop("Failed to retrieve league metadata") + }) + } else { + tryCatch({ + league_page <- xml2::read_html(league_url) + comp_url <- league_url + comp_name <- league_page %>% + rvest::html_nodes(".data-header__headline-wrapper--oswald") %>% + rvest::html_text() %>% + stringr::str_squish() + country <- league_page %>% + rvest::html_nodes(".data-header img") %>% + rvest::html_attr("alt") %>% + .[1] %>% + stringr::str_squish() + }, error = function(e) { + stop(glue::glue("Invalid league URL: {league_url}")) + }) + } + + suspensions_url <- comp_url %>% + gsub("startseite", "sperrenausfaelle", .) %>% + paste0(., "/plus/1") + + tryCatch({ + page <- xml2::read_html(suspensions_url) + }, error = function(e) { + warning(glue::glue("Failed to read suspensions page: {suspensions_url}")) + return(data.frame()) + }) + + suspensions_data <- tryCatch({ + boxes <- page %>% rvest::html_nodes("div.box") + suspensions_table <- boxes[[1]] %>% rvest::html_node("table.items") + + rows <- suspensions_table %>% + rvest::html_nodes("tbody tr") %>% + purrr::keep(~length(rvest::html_node(.x, "table.inline-table")) > 0) + + purrr::map_df(rows, ~{ + player_info <- .x %>% rvest::html_node("td:first-child table.inline-table") + + tibble::tibble( + Player = tryCatch(player_info %>% + rvest::html_node("td:nth-child(2) a") %>% + rvest::html_text(trim = TRUE), + error = function(e) NA_character_), + Position = tryCatch(player_info %>% + rvest::html_node("tr:nth-child(2) td") %>% + rvest::html_text(trim = TRUE), + error = function(e) NA_character_), + Club = tryCatch(.x %>% + rvest::html_node("td:nth-child(2) a img") %>% + rvest::html_attr("title"), + error = function(e) NA_character_), + Age = tryCatch(.x %>% + rvest::html_node("td:nth-child(3)") %>% + rvest::html_text(trim = TRUE) %>% + as.numeric(), + error = function(e) NA_real_), + Reason = tryCatch(.x %>% + rvest::html_node("td:nth-child(4)") %>% + rvest::html_text(trim = TRUE), + error = function(e) NA_character_), + Since = tryCatch(.x %>% + rvest::html_node("td:nth-child(5)") %>% + rvest::html_text(trim = TRUE) %>% + .tm_fix_dates() %>% + as.Date(), + error = function(e) NA_Date_), + Until = tryCatch(.x %>% + rvest::html_node("td:nth-child(6)") %>% + rvest::html_text(trim = TRUE) %>% + .tm_fix_dates() %>% + as.Date(), + error = function(e) NA_Date_), + Matches_Missed = tryCatch(.x %>% + rvest::html_node("td:nth-child(7)") %>% + rvest::html_text(trim = TRUE) %>% + as.numeric(), + error = function(e) NA_real_) + ) + }) %>% + dplyr::mutate(dplyr::across(where(is.character), .replace_empty_na)) + }, error = function(e) { + warning("Failed to extract suspension data") + return(data.frame()) + }) + + if (nrow(suspensions_data) > 0) { + suspensions_data <- suspensions_data %>% + dplyr::mutate( + Country = ifelse(is.na(league_url), country_name, country), + Competition = comp_name, + .before = 1 + ) + } + + return(suspensions_data) +} + +#' Get League Risk of Suspension +#' +#' Returns a data frame of players at risk of suspension +#' +#' @inheritParams tm_get_suspensions +#' @return A data frame of players at risk of suspension +#' @export +#' @examples +#' \dontrun{ +#' try({ +#' tm_get_risk_of_suspension(country_name = "England") +#' tm_get_risk_of_suspension(league_url = "https://www.transfermarkt.com/premier-league/sperrenausfaelle/wettbewerb/GB1") +#' }) +#' } +tm_get_risk_of_suspension <- function(country_name, league_url = NA) { + main_url <- "https://www.transfermarkt.com" + + .replace_empty_na <- function(x) { + ifelse(is.na(x) | x == "", NA_character_, x) + } + + if(is.na(league_url)) { + tryCatch({ + meta_df <- utils::read.csv(url("https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/master/raw-data/transfermarkt_leagues/main_comp_seasons.csv"), + stringsAsFactors = FALSE) + meta_filtered <- meta_df %>% dplyr::filter(.data[["country"]] == country_name) + comp_url <- meta_filtered$comp_url[1] + comp_name <- meta_filtered$comp_name[1] + }, error = function(e) { + stop("Failed to retrieve league metadata") + }) + } else { + tryCatch({ + league_page <- xml2::read_html(league_url) + comp_url <- league_url + comp_name <- league_page %>% + rvest::html_nodes(".data-header__headline-wrapper--oswald") %>% + rvest::html_text() %>% + stringr::str_squish() + country <- league_page %>% + rvest::html_nodes(".data-header img") %>% + rvest::html_attr("alt") %>% + .[1] %>% + stringr::str_squish() + }, error = function(e) { + stop(glue::glue("Invalid league URL: {league_url}")) + }) + } + + risk_url <- comp_url %>% + gsub("startseite", "sperrenausfaelle", .) %>% + paste0(., "/plus/1") + + tryCatch({ + page <- xml2::read_html(risk_url) + }, error = function(e) { + warning(glue::glue("Failed to read risk page: {risk_url}")) + return(data.frame()) + }) + + risk_data <- tryCatch({ + boxes <- page %>% rvest::html_nodes("div.box") + risk_table <- boxes[[2]] %>% rvest::html_node("table.items") + + rows <- risk_table %>% + rvest::html_nodes("tbody tr") %>% + purrr::keep(~length(rvest::html_node(.x, "table.inline-table")) > 0) + + purrr::map_df(rows, ~{ + player_info <- .x %>% rvest::html_node("td:first-child table.inline-table") + + tibble::tibble( + Player = tryCatch(player_info %>% rvest::html_node("td:nth-child(2) a") %>% + rvest::html_text(trim = TRUE), error = function(e) NA), + Position = tryCatch(player_info %>% rvest::html_node("tr:nth-child(2) td") %>% + rvest::html_text(trim = TRUE), error = function(e) NA), + Club = tryCatch(.x %>% rvest::html_node("td:nth-child(2) a img") %>% + rvest::html_attr("title") %>% trimws(), error = function(e) NA), + Age = tryCatch(.x %>% rvest::html_node("td:nth-child(3)") %>% + rvest::html_text(trim = TRUE) %>% as.numeric(), error = function(e) NA), + Yellow_Cards = tryCatch(.x %>% rvest::html_node("td:nth-child(4)") %>% + rvest::html_text(trim = TRUE) %>% as.numeric(), error = function(e) NA) + ) + }) %>% + dplyr::mutate(dplyr::across(where(is.character), .replace_empty_na)) + }, error = function(e) { + warning("Failed to extract risk data") + return(data.frame()) + }) + + if(is.na(league_url)) { + risk_data %>% + dplyr::mutate(Country = country_name, + Competition = comp_name, + .before = 1) + } else { + risk_data %>% + dplyr::mutate(Country = country, + Competition = comp_name, + .before = 1) + } +} diff --git a/man/tm_get_risk_of_suspension.Rd b/man/tm_get_risk_of_suspension.Rd new file mode 100644 index 00000000..b3137fc1 --- /dev/null +++ b/man/tm_get_risk_of_suspension.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tm_league_suspensions.R +\name{tm_get_risk_of_suspension} +\alias{tm_get_risk_of_suspension} +\title{Get League Risk of Suspension} +\usage{ +tm_get_risk_of_suspension(country_name, league_url = NA) +} +\arguments{ +\item{country_name}{The country of the league (used if league_url not provided)} + +\item{league_url}{Optional direct league URL from transfermarkt.com} +} +\value{ +A data frame of players at risk of suspension +} +\description{ +Returns a data frame of players at risk of suspension +} +\examples{ +\dontrun{ +try({ +tm_get_risk_of_suspension(country_name = "England") +tm_get_risk_of_suspension(league_url = "https://www.transfermarkt.com/premier-league/sperrenausfaelle/wettbewerb/GB1") +}) +} +} diff --git a/man/tm_get_suspensions.Rd b/man/tm_get_suspensions.Rd new file mode 100644 index 00000000..41091cc6 --- /dev/null +++ b/man/tm_get_suspensions.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tm_league_suspensions.R +\name{tm_get_suspensions} +\alias{tm_get_suspensions} +\title{Get League Suspensions} +\usage{ +tm_get_suspensions(country_name = NA, league_url = NA) +} +\arguments{ +\item{country_name}{The country of the league (used if league_url not provided)} + +\item{league_url}{Optional direct league URL from transfermarkt.com} +} +\value{ +A data frame of suspended players +} +\description{ +Returns a data frame of all suspended players in a selected league +} +\examples{ +\dontrun{ +try({ +tm_get_suspensions(country_name = "England") +tm_get_suspensions(league_url = "https://www.transfermarkt.com/premier-league/sperrenausfaelle/wettbewerb/GB1") +}) +} +} diff --git a/man/tm_league_injuries.Rd b/man/tm_league_injuries.Rd index 8341b445..80c51f70 100644 --- a/man/tm_league_injuries.Rd +++ b/man/tm_league_injuries.Rd @@ -15,5 +15,5 @@ tm_league_injuries(country_name, league_url = NA) returns a dataframe of injured players in the selected league } \description{ -Returns a data frame of all currently injured players players for a selected league +Returns a data frame of all currently injured players for a selected league } diff --git a/tests/testthat/test-transfermarkt.R b/tests/testthat/test-transfermarkt.R index acf7fb8e..e6ade09f 100644 --- a/tests/testthat/test-transfermarkt.R +++ b/tests/testthat/test-transfermarkt.R @@ -252,3 +252,22 @@ test_that("tm_get_player_absence() works", { }) +test_that("tm_get_suspensions() works", { + testthat::skip_on_cran() + + player_suspensions <- tm_get_suspensions(league_url = "https://www.transfermarkt.com/jupiler-pro-league/sperrenausfaelle/wettbewerb/BE1") + expect_type(player_suspensions, "list") + expect_equal(ncol(player_suspensions), 10) + expect_false(nrow(player_suspensions) == 0) +}) + + +test_that("tm_get_risk_of_suspensions() works", { + testthat::skip_on_cran() + + player_stats <- tm_get_risk_of_suspension(league_url = "https://www.transfermarkt.com/jupiler-pro-league/sperrenausfaelle/wettbewerb/BE1") + expect_type(player_stats, "list") + expect_equal(ncol(player_stats), 7) + expect_false(nrow(player_stats) == 0) + +}) diff --git a/vignettes/extract-transfermarkt-data.Rmd b/vignettes/extract-transfermarkt-data.Rmd index 84a25d1b..2bfc7da1 100644 --- a/vignettes/extract-transfermarkt-data.Rmd +++ b/vignettes/extract-transfermarkt-data.Rmd @@ -127,7 +127,7 @@ This section will cover the functions to aid in the extraction of season team st ### League Table by Matchdays -To be able to extract league tables for select matchday(s), the below function can be used. +To be able to extract league tables for select matchday(s), the below function can be used. The function can accept either the country name, season start year and matchday number(s), or for leagues not contained in the [worldfootballR_data](https://github.com/JaseZiv/worldfootballR_data/blob/master/raw-data/transfermarkt_leagues/main_comp_seasons.csv) repository, it can accept the league URL, season start year and matchday number(s). @@ -392,5 +392,24 @@ dplyr::glimpse(epl_gk_coach_job_histories) ``` +### Player Suspensions +You can extract players with current suspensions in a league using `tm_get_suspensions()`. +The list of countries that can be passed to the `country_name` argument can be found [here](https://github.com/JaseZiv/worldfootballR_data/blob/master/raw-data/transfermarkt_leagues/main_comp_seasons.csv). `league_url` can also be specified--the same file has a non-exhaustive list of valid URLs in `comp_url`. You may need to specify `league_url` for non-first tier leagues. + +```{r tm_get_suspensions} +england_suspensions <- tm_get_suspensions(country_name = "England") +epl_suspensions <- tm_get_suspensions(league_url = "https://www.transfermarkt.com/premier-league/sperrenausfaelle/wettbewerb/GB1") +dplyr::glimpse(epl_suspensions) +``` + +### Players at Risk of Suspension + +You can extract players at risk of being suspended (e.g. accumulating yellow cards) in a league using `tm_get_risk_of_suspension()`. The function arguments work in the same way as they do for `tm_get_suspensions()`. + +```{r tm_get_risk_of_suspension} +england_risk_of_suspension <- tm_get_risk_of_suspension(country_name = "England") +epl_risk_of_suspension <- tm_get_risk_of_suspension(league_url = "https://www.transfermarkt.com/premier-league/sperrenausfaelle/wettbewerb/GB1") +dplyr::glimpse(epl_risk_of_suspension) +```