Skip to content

Commit

Permalink
Merge pull request #412 from aymennasri/add_suspensions
Browse files Browse the repository at this point in the history
Add suspensions  from transfermarkt [Closes #411]
  • Loading branch information
tonyelhabr authored Feb 9, 2025
2 parents 71c95b9 + d6b960a commit 75a463f
Show file tree
Hide file tree
Showing 10 changed files with 349 additions and 6 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: worldfootballR
Title: Extract and Clean World Football (Soccer) Data
Version: 0.6.6.0001
Version: 0.6.7
Authors@R: c(
person("Jason", "Zivkovic", , "[email protected]", role = c("aut", "cre", "cph")),
person("Tony", "ElHabr", , "[email protected]", role = "ctb"),
Expand Down Expand Up @@ -54,4 +54,4 @@ Suggests:
rmarkdown,
testthat
Encoding: UTF-8
RoxygenNote: 7.2.3
RoxygenNote: 7.3.2
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ export(player_dictionary_mapping)
export(player_transfer_history)
export(tm_expiring_contracts)
export(tm_get_player_absence)
export(tm_get_risk_of_suspension)
export(tm_get_suspensions)
export(tm_league_debutants)
export(tm_league_injuries)
export(tm_league_team_urls)
Expand Down
6 changes: 5 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# worldfootballR (development version)
# worldfootballR 0.6.7

### Bugs

Expand All @@ -7,6 +7,10 @@
### Improvements

* `tm_expiring_contracts()` returns additional `date_of_birth` column. (0.6.6.0001) [#355](https://github.com/JaseZiv/worldfootballR/issues/397)
* `tm_player_bio()` now includes three new columns. `picture_url` containing the URL of the player's picture from Transfermarkt, `squad_number` containing the current worn squad number of the player and `player_id` that that contains the player ID on Transfermarkt.
* `get_player_info()` added. ([#47](https://github.com/JaseZiv/worldfootballR/issues/47))
* `tm_get_suspensions()` and `tm_get_risk_of_suspensions()` added. ([#411](https://github.com/JaseZiv/worldfootballR/issues/411))


***

Expand Down
2 changes: 1 addition & 1 deletion R/tm_league_injuries.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#' Get league injuries
#'
#' Returns a data frame of all currently injured players players for a selected league
#' Returns a data frame of all currently injured players for a selected league
#'
#' @param country_name the country of the league's players
#' @param league_url league url from transfermarkt.com. To be used when country_name not available in main function
Expand Down
245 changes: 245 additions & 0 deletions R/tm_league_suspensions.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,245 @@
#' Get League Suspensions
#'
#' Returns a data frame of all suspended players in a selected league
#'
#' @param country_name The country of the league (used if league_url not provided)
#' @param league_url Optional direct league URL from transfermarkt.com
#' @return A data frame of suspended players
#' @importFrom magrittr %>%
#' @importFrom rlang .data
#' @export
#' @examples
#' \dontrun{
#' try({
#' tm_get_suspensions(country_name = "England")
#' tm_get_suspensions(league_url = "https://www.transfermarkt.com/premier-league/sperrenausfaelle/wettbewerb/GB1")
#' })
#' }
tm_get_suspensions <- function(country_name = NA, league_url = NA) {
main_url <- "https://www.transfermarkt.com"

.replace_empty_na <- function(x) {
ifelse(is.na(x) | x == "", NA_character_, x)
}

if (is.na(league_url) && is.na(country_name)) {
stop("You must provide either league_url or country_name")
}

if (is.na(league_url)) {
tryCatch({
meta_df <- utils::read.csv(url("https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/master/raw-data/transfermarkt_leagues/main_comp_seasons.csv"),
stringsAsFactors = FALSE)
meta_filtered <- meta_df %>% dplyr::filter(.data[["country"]] == country_name)

if (nrow(meta_filtered) == 0) {
stop(glue::glue("Country {country_name} not found in metadata"))
}

comp_url <- meta_filtered$comp_url[1]
comp_name <- meta_filtered$comp_name[1]
country <- country_name
}, error = function(e) {
stop("Failed to retrieve league metadata")
})
} else {
tryCatch({
league_page <- xml2::read_html(league_url)
comp_url <- league_url
comp_name <- league_page %>%
rvest::html_nodes(".data-header__headline-wrapper--oswald") %>%
rvest::html_text() %>%
stringr::str_squish()
country <- league_page %>%
rvest::html_nodes(".data-header img") %>%
rvest::html_attr("alt") %>%
.[1] %>%
stringr::str_squish()
}, error = function(e) {
stop(glue::glue("Invalid league URL: {league_url}"))
})
}

suspensions_url <- comp_url %>%
gsub("startseite", "sperrenausfaelle", .) %>%
paste0(., "/plus/1")

tryCatch({
page <- xml2::read_html(suspensions_url)
}, error = function(e) {
warning(glue::glue("Failed to read suspensions page: {suspensions_url}"))
return(data.frame())
})

suspensions_data <- tryCatch({
boxes <- page %>% rvest::html_nodes("div.box")
suspensions_table <- boxes[[1]] %>% rvest::html_node("table.items")

rows <- suspensions_table %>%
rvest::html_nodes("tbody tr") %>%
purrr::keep(~length(rvest::html_node(.x, "table.inline-table")) > 0)

purrr::map_df(rows, ~{
player_info <- .x %>% rvest::html_node("td:first-child table.inline-table")

tibble::tibble(
Player = tryCatch(player_info %>%
rvest::html_node("td:nth-child(2) a") %>%
rvest::html_text(trim = TRUE),
error = function(e) NA_character_),
Position = tryCatch(player_info %>%
rvest::html_node("tr:nth-child(2) td") %>%
rvest::html_text(trim = TRUE),
error = function(e) NA_character_),
Club = tryCatch(.x %>%
rvest::html_node("td:nth-child(2) a img") %>%
rvest::html_attr("title"),
error = function(e) NA_character_),
Age = tryCatch(.x %>%
rvest::html_node("td:nth-child(3)") %>%
rvest::html_text(trim = TRUE) %>%
as.numeric(),
error = function(e) NA_real_),
Reason = tryCatch(.x %>%
rvest::html_node("td:nth-child(4)") %>%
rvest::html_text(trim = TRUE),
error = function(e) NA_character_),
Since = tryCatch(.x %>%
rvest::html_node("td:nth-child(5)") %>%
rvest::html_text(trim = TRUE) %>%
.tm_fix_dates() %>%
as.Date(),
error = function(e) NA_Date_),
Until = tryCatch(.x %>%
rvest::html_node("td:nth-child(6)") %>%
rvest::html_text(trim = TRUE) %>%
.tm_fix_dates() %>%
as.Date(),
error = function(e) NA_Date_),
Matches_Missed = tryCatch(.x %>%
rvest::html_node("td:nth-child(7)") %>%
rvest::html_text(trim = TRUE) %>%
as.numeric(),
error = function(e) NA_real_)
)
}) %>%
dplyr::mutate(dplyr::across(where(is.character), .replace_empty_na))
}, error = function(e) {
warning("Failed to extract suspension data")
return(data.frame())
})

if (nrow(suspensions_data) > 0) {
suspensions_data <- suspensions_data %>%
dplyr::mutate(
Country = ifelse(is.na(league_url), country_name, country),
Competition = comp_name,
.before = 1
)
}

return(suspensions_data)
}

#' Get League Risk of Suspension
#'
#' Returns a data frame of players at risk of suspension
#'
#' @inheritParams tm_get_suspensions
#' @return A data frame of players at risk of suspension
#' @export
#' @examples
#' \dontrun{
#' try({
#' tm_get_risk_of_suspension(country_name = "England")
#' tm_get_risk_of_suspension(league_url = "https://www.transfermarkt.com/premier-league/sperrenausfaelle/wettbewerb/GB1")
#' })
#' }
tm_get_risk_of_suspension <- function(country_name, league_url = NA) {
main_url <- "https://www.transfermarkt.com"

.replace_empty_na <- function(x) {
ifelse(is.na(x) | x == "", NA_character_, x)
}

if(is.na(league_url)) {
tryCatch({
meta_df <- utils::read.csv(url("https://raw.githubusercontent.com/JaseZiv/worldfootballR_data/master/raw-data/transfermarkt_leagues/main_comp_seasons.csv"),
stringsAsFactors = FALSE)
meta_filtered <- meta_df %>% dplyr::filter(.data[["country"]] == country_name)
comp_url <- meta_filtered$comp_url[1]
comp_name <- meta_filtered$comp_name[1]
}, error = function(e) {
stop("Failed to retrieve league metadata")
})
} else {
tryCatch({
league_page <- xml2::read_html(league_url)
comp_url <- league_url
comp_name <- league_page %>%
rvest::html_nodes(".data-header__headline-wrapper--oswald") %>%
rvest::html_text() %>%
stringr::str_squish()
country <- league_page %>%
rvest::html_nodes(".data-header img") %>%
rvest::html_attr("alt") %>%
.[1] %>%
stringr::str_squish()
}, error = function(e) {
stop(glue::glue("Invalid league URL: {league_url}"))
})
}

risk_url <- comp_url %>%
gsub("startseite", "sperrenausfaelle", .) %>%
paste0(., "/plus/1")

tryCatch({
page <- xml2::read_html(risk_url)
}, error = function(e) {
warning(glue::glue("Failed to read risk page: {risk_url}"))
return(data.frame())
})

risk_data <- tryCatch({
boxes <- page %>% rvest::html_nodes("div.box")
risk_table <- boxes[[2]] %>% rvest::html_node("table.items")

rows <- risk_table %>%
rvest::html_nodes("tbody tr") %>%
purrr::keep(~length(rvest::html_node(.x, "table.inline-table")) > 0)

purrr::map_df(rows, ~{
player_info <- .x %>% rvest::html_node("td:first-child table.inline-table")

tibble::tibble(
Player = tryCatch(player_info %>% rvest::html_node("td:nth-child(2) a") %>%
rvest::html_text(trim = TRUE), error = function(e) NA),
Position = tryCatch(player_info %>% rvest::html_node("tr:nth-child(2) td") %>%
rvest::html_text(trim = TRUE), error = function(e) NA),
Club = tryCatch(.x %>% rvest::html_node("td:nth-child(2) a img") %>%
rvest::html_attr("title") %>% trimws(), error = function(e) NA),
Age = tryCatch(.x %>% rvest::html_node("td:nth-child(3)") %>%
rvest::html_text(trim = TRUE) %>% as.numeric(), error = function(e) NA),
Yellow_Cards = tryCatch(.x %>% rvest::html_node("td:nth-child(4)") %>%
rvest::html_text(trim = TRUE) %>% as.numeric(), error = function(e) NA)
)
}) %>%
dplyr::mutate(dplyr::across(where(is.character), .replace_empty_na))
}, error = function(e) {
warning("Failed to extract risk data")
return(data.frame())
})

if(is.na(league_url)) {
risk_data %>%
dplyr::mutate(Country = country_name,
Competition = comp_name,
.before = 1)
} else {
risk_data %>%
dplyr::mutate(Country = country,
Competition = comp_name,
.before = 1)
}
}
27 changes: 27 additions & 0 deletions man/tm_get_risk_of_suspension.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

27 changes: 27 additions & 0 deletions man/tm_get_suspensions.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/tm_league_injuries.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 19 additions & 0 deletions tests/testthat/test-transfermarkt.R
Original file line number Diff line number Diff line change
Expand Up @@ -252,3 +252,22 @@ test_that("tm_get_player_absence() works", {
})


test_that("tm_get_suspensions() works", {
testthat::skip_on_cran()

player_suspensions <- tm_get_suspensions(league_url = "https://www.transfermarkt.com/jupiler-pro-league/sperrenausfaelle/wettbewerb/BE1")
expect_type(player_suspensions, "list")
expect_equal(ncol(player_suspensions), 10)
expect_false(nrow(player_suspensions) == 0)
})


test_that("tm_get_risk_of_suspensions() works", {
testthat::skip_on_cran()

player_stats <- tm_get_risk_of_suspension(league_url = "https://www.transfermarkt.com/jupiler-pro-league/sperrenausfaelle/wettbewerb/BE1")
expect_type(player_stats, "list")
expect_equal(ncol(player_stats), 7)
expect_false(nrow(player_stats) == 0)

})
Loading

0 comments on commit 75a463f

Please sign in to comment.