Skip to content

Commit

Permalink
html_node -> html_element, match_player return cleanup, revert roxygen
Browse files Browse the repository at this point in the history
  • Loading branch information
tonyelhabr committed Jul 9, 2024
1 parent 73ed93c commit 229cd51
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 35 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -54,4 +54,4 @@ Suggests:
rmarkdown,
testthat
Encoding: UTF-8
RoxygenNote: 7.3.1
RoxygenNote: 7.2.3
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -118,9 +118,11 @@ importFrom(rvest,html_text)
importFrom(rvest,read_html)
importFrom(stats,runif)
importFrom(stats,setNames)
importFrom(stringi,stri_unescape_unicode)
importFrom(stringr,str_detect)
importFrom(stringr,str_replace_all)
importFrom(stringr,str_squish)
importFrom(stringr,str_subset)
importFrom(tibble,tibble)
importFrom(tidyr,crossing)
importFrom(tidyselect,vars_select_helpers)
Expand Down
66 changes: 35 additions & 31 deletions R/understat_match_players.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@
#' @return returns a `data.frame` with data for all players for the match.
#'
#' @importFrom magrittr %>%
#' @importFrom rvest html_elements
#' @importFrom stringi stri_unescape_unicode
#' @importFrom stringr str_subset
#' @importFrom jsonlite fromJSON
#' @importFrom dplyr bind_rows
#'
#' @export

Expand All @@ -16,12 +21,11 @@ understat_match_players <- function(match_url) {
match_id <- gsub("[^0-9]", "", match_url)

match_player_data <- .get_understat_json(page_url = match_url) %>%
rvest::html_nodes("script") %>%
rvest::html_elements("script") %>%
as.character()

match_player_data <- match_player_data[grep("rostersData\t=", match_player_data)] %>%
stringi::stri_unescape_unicode() %>%

substr(41,nchar(.)) %>%
substr(0,nchar(.)-13) %>%
paste0('[', . , ']') %>%
Expand All @@ -35,36 +39,36 @@ understat_match_players <- function(match_url) {
match_player_data_home <- do.call(rbind.data.frame, match_player_data$h)
match_player_data_away <- do.call(rbind.data.frame, match_player_data$a)

match_player_data <- dplyr::bind_rows(match_player_data_home,match_player_data_away) %>%

dplyr::mutate(match_id = as.integer(match_id),
team_id = as.integer(team_id),
team_status = as.character(team_status),
player_id = as.integer(player_id),
swap_id = as.integer(id),
player = as.character(player),
position = as.character(position),
positionOrder = as.integer(positionOrder),
time_played = as.integer(time),
goals = as.integer(goals),
own_goals = as.integer(own_goals),
shots = as.integer(shots),
xG = as.numeric(xG),
yellow_card = as.integer(yellow_card),
red_card = as.integer(red_card),
roster_in = as.integer(roster_in),
roster_out = as.integer(roster_out),
key_passes = as.integer(key_passes),
assists = as.integer(assists),
xA = as.numeric(xA),
xGChain = as.numeric(xGChain),
XGBuildup = as.numeric(XGBuildup)) %>%

dplyr::select(-c(time,id)) %>%

dplyr::mutate(team_status = ifelse(team_status=="h","home","away"))

return(match_player_data)
match_player_data_rebind <- dplyr::bind_rows(match_player_data_home, match_player_data_away)

match_players <- data.frame(
match_id = as.integer(match_id),
id = as.integer(match_player_data_rebind[["id"]]),
team_id = as.integer(match_player_data_rebind[["team_id"]]),
home_away = as.character(match_player_data_rebind[["h_a"]]),
player_id = as.integer(match_player_data_rebind[["player_id"]]),
swap_id = as.integer(match_player_data_rebind[["id"]]),
player = as.character(match_player_data_rebind[["player"]]),
position = as.character(match_player_data_rebind[["position"]]),
positionOrder = as.integer(match_player_data_rebind[["positionOrder"]]),
time_played = as.integer(match_player_data_rebind[["time"]]),
goals = as.integer(match_player_data_rebind[["goals"]]),
own_goals = as.integer(match_player_data_rebind[["own_goals"]]),
shots = as.integer(match_player_data_rebind[["shots"]]),
xG = as.numeric(match_player_data_rebind[["xG"]]),
yellow_card = as.integer(match_player_data_rebind[["yellow_card"]]),
red_card = as.integer(match_player_data_rebind[["red_card"]]),
roster_in = as.integer(match_player_data_rebind[["roster_in"]]),
roster_out = as.integer(match_player_data_rebind[["roster_out"]]),
key_passes = as.integer(match_player_data_rebind[["key_passes"]]),
assists = as.integer(match_player_data_rebind[["assists"]]),
xA = as.numeric(match_player_data_rebind[["xA"]]),
xGChain = as.numeric(match_player_data_rebind[["xGChain"]]),
xGBuildup = as.numeric(match_player_data_rebind[["xGBuildup"]])
)

return(match_players)
}


5 changes: 3 additions & 2 deletions R/understat_match_stats.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,15 @@
#' @details For `draw_chances`, `home_chances` and `away_chances`, values below 10% in the browser will be retrieved as NA (e.g. A "5%" chance will be NA in the `data.frame`).
#'
#' @importFrom magrittr %>%
#' @importFrom rvest html_elements html_text
#'
#' @export

understat_match_stats <- function(match_url) {

match_stats <- .get_understat_json(page_url = match_url) %>%
rvest::html_nodes("div.scheme-block.is-hide[data-scheme='stats']") %>%
rvest::html_nodes(".progress-value") %>%
rvest::html_elements("div.scheme-block.is-hide[data-scheme='stats']") %>%
rvest::html_elements(".progress-value") %>%
rvest::html_text()

away <- match_stats[seq(1, length(match_stats), by=2)]
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test-understat.R
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ test_that("understat_match_players() works", {
testthat::skip_on_cran()
match_players <- understat_match_players(match_url = "https://understat.com/match/14789")
expect_true(any("data.frame" == class(match_players)))
expect_equal(ncol(match_players), 22)
expect_equal(ncol(match_players), 23)
expect_gt(nrow(match_players), 0)
})

Expand Down

0 comments on commit 229cd51

Please sign in to comment.