Skip to content

Commit

Permalink
Merge pull request #416 from JaseZiv/update-tm_player_bio
Browse files Browse the repository at this point in the history
Add extra player bio info from transfermarkt [Closes #141] v2
  • Loading branch information
tonyelhabr authored Feb 11, 2025
2 parents 0282485 + b2f8e88 commit 4d6bd1a
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 3 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: worldfootballR
Title: Extract and Clean World Football (Soccer) Data
Version: 0.6.7.0001
Version: 0.6.7.0002
Authors@R: c(
person("Jason", "Zivkovic", , "[email protected]", role = c("aut", "cre", "cph")),
person("Tony", "ElHabr", , "[email protected]", role = "ctb"),
Expand Down
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

* `tm_get_suspensions()` and `tm_get_risk_of_suspensions()` added. (0.6.7.0000) ([#411](https://github.com/JaseZiv/worldfootballR/issues/411))
* `fb_player_info()` added. (0.6.7.0001) ([#47](https://github.com/JaseZiv/worldfootballR/issues/47))
* `tm_player_bio()` now includes three new columns. `picture_url` containing the URL of the player's picture from Transfermarkt, `squad_number` containing the current worn squad number of the player and `player_id` that that contains the player ID on Transfermarkt. (0.6.7.0002) ([#141](https://github.com/JaseZiv/worldfootballR/issues/141))

***

Expand Down
24 changes: 22 additions & 2 deletions R/tm_player_bio.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,20 @@ tm_player_bio <- function(player_urls) {

# print(glue::glue("Scraping player_bio for {player_name}"))

player_id <- stringr::str_extract(player_url, "(?<=spieler/)\\d+") %>% as.numeric()
player_picture_url <- tryCatch({
if(inherits(player_page, "xml_document")) {
player_page %>%
rvest::html_nodes(".data-header__profile-container img.data-header__profile-image") %>%
rvest::html_attr("src")
} else {
NA_character_
}
}, error = function(e) {
NA_character_
}) %>%
.replace_empty_na()

X1 <- player_page %>% rvest::html_nodes(".info-table__content--regular") %>% rvest::html_text() %>% stringr::str_squish() %>% .replace_empty_na()
X2 <- player_page %>% rvest::html_nodes(".info-table__content--bold") %>% rvest::html_text() %>% stringr::str_squish() %>% .replace_empty_na()

Expand All @@ -61,14 +75,20 @@ tm_player_bio <- function(player_urls) {
val_df <- data.frame(X1=c("player_valuation", "max_player_valuation", "max_player_valuation_date"), X2=c(player_val, player_val_max, player_val_max_date))
a <- rbind(a, val_df)

squad_number_url <- gsub("profil", "rueckennummern", player_url)
squad_number_page <- tryCatch(xml2::read_html(squad_number_url), error = function(e) NA)
squad_number <- squad_number_page %>% rvest::html_node("tbody tr:first-child td:nth-child(4)") %>% rvest::html_text(trim = TRUE)

a <- a %>%
dplyr::mutate(player_name = player_name) %>%
dplyr::mutate(player_name = player_name, player_id = player_id) %>%
tidyr::pivot_wider(names_from = .data[["X1"]], values_from = .data[["X2"]]) %>%
janitor::clean_names() %>%
dplyr::mutate(squad_number = squad_number) %>%
dplyr::mutate(player_valuation = .convert_value_to_numeric(euro_value = .data[["player_valuation"]]),
max_player_valuation = .convert_value_to_numeric(euro_value = .data[["max_player_valuation"]]),
max_player_valuation_date = .tm_fix_dates(dirty_dates = .data[["max_player_valuation_date"]])) %>%
dplyr::mutate(URL = player_url)
dplyr::mutate(URL = player_url, picture_url = player_picture_url) %>%
dplyr::select(-na)
} else {
a <- data.frame()
}
Expand Down

0 comments on commit 4d6bd1a

Please sign in to comment.