Skip to content

Commit

Permalink
Merge pull request #2 from jruizcabrejos/understat_match_
Browse files Browse the repository at this point in the history
Understat match review
  • Loading branch information
jruizcabrejos authored Jul 8, 2024
2 parents 75c19a3 + 4330219 commit be60541
Show file tree
Hide file tree
Showing 5 changed files with 92 additions and 55 deletions.
69 changes: 43 additions & 26 deletions R/understat_match_players.R
Original file line number Diff line number Diff line change
@@ -1,51 +1,68 @@

#' Get Understat match player data
#'
#' Returns player values for a selected match from Understat.com
#' Returns player values for a selected match from Understat.com.
#'
#' @param match_url the URL of the match played
#' @param match_url A `character` string with the URL of the match played.
#'
#' @return returns a dataframe with data for all players for the match
#' @return returns a `data.frame` with data for all players for the match.
#'
#' @importFrom magrittr %>%
#'
#' @export

understat_match_players <- function(match_url) {
# .pkg_message("Scraping all shots for match {match_url}. Please acknowledge understat.com as the data source")

match_id <- gsub("[^0-9]", "", match_url)
match_id <- gsub("[^0-9]", "", match_url)

match_player_data <- .get_understat_json(page_url = match_url) %>%
match_player_data <- .get_understat_json(page_url = match_url) %>%
rvest::html_nodes("script") %>%
as.character()

match_player_data <- match_player_data[grep("rostersData\t=", match_player_data)] %>%
stringi::stri_unescape_unicode() %>%
match_player_data <- match_player_data[grep("rostersData\t=", match_player_data)] %>%
stringi::stri_unescape_unicode() %>%

substr(41,nchar(.)) %>%
substr(0,nchar(.)-13) %>%
paste0('[', . , ']') %>%
substr(41,nchar(.)) %>%
substr(0,nchar(.)-13) %>%
paste0('[', . , ']') %>%

unlist() %>%
stringr::str_subset("\\[\\]", negate = TRUE)
unlist() %>%
stringr::str_subset("\\[\\]", negate = TRUE)

match_player_data <- lapply(match_player_data, jsonlite::fromJSON) %>%
do.call("rbind", .)
match_player_data <- lapply(match_player_data, jsonlite::fromJSON) %>%
do.call("rbind", .)

match_player_data_home <- do.call(rbind.data.frame, match_player_data$h)
match_player_data_away <- do.call(rbind.data.frame, match_player_data$a)
match_player_data_home <- do.call(rbind.data.frame, match_player_data$h)
match_player_data_away <- do.call(rbind.data.frame, match_player_data$a)

match_player_data <- bind_rows(match_player_data_home,match_player_data_away) %>%
mutate(match_id = match_id) %>%
match_player_data <- dplyr::bind_rows(match_player_data_home,match_player_data_away) %>%

select(match_id, team_id,
team_status = h_a,
player_id, swap_id = id,
player, position, positionOrder,
time_played = time,
everything()) %>%
mutate(team_status = ifelse(team_status=="h","home","away"))
dplyr::mutate(match_id = as.integer(match_id),
team_id = as.integer(team_id),
team_status = as.character(team_status),
player_id = as.integer(player_id),
swap_id = as.integer(id),
player = as.character(player),
position = as.character(position),
positionOrder = as.integer(positionOrder),
time_played = as.integer(time),
goals = as.integer(goals),
own_goals = as.integer(own_goals),
shots = as.integer(shots),
xG = as.numeric(xG),
yellow_card = as.integer(yellow_card),
red_card = as.integer(red_card),
roster_in = as.integer(roster_in),
roster_out = as.integer(roster_out),
key_passes = as.integer(key_passes),
assists = as.integer(assists),
xA = as.numeric(xA),
xGChain = as.numeric(xGChain),
XGBuildup = as.numeric(XGBuildup)) %>%

dplyr::select(-c(time,id)) %>%

dplyr::mutate(team_status = ifelse(team_status=="h","home","away"))

return(match_player_data)
}
Expand Down
47 changes: 24 additions & 23 deletions R/understat_match_stats.R
Original file line number Diff line number Diff line change
@@ -1,18 +1,19 @@

#' Get Understat match stats table data
#'
#' Returns the Stats values for a selected match from Understat.com
#' Returns the Stats values for a selected match from Understat.com.
#'
#' @param match_url the URL of the match played
#' @param match_url A `character` string with the URL of the match played.
#'
#' @return returns a dataframe with data from the stats table for the match
#' @return returns a `data.frame` with data from the stats table for the match.
#'
#' @details For definitions on each variable (e.g. PPDA), you can hover with your mouse over each of them in the Stats section of the match at understat.com. For `draw_chances`, `home_chances` and `away_chances`, values below 10% will be retrieved as NA, however they sometimes have a value inside the match website (e.g. "5%").
#'
#' @importFrom magrittr %>%
#'
#' @export

understat_match_stats <- function(match_url) {
# .pkg_message("Scraping all shots for match {match_url}. Please acknowledge understat.com as the data source")

match_stats <- .get_understat_json(page_url = match_url) %>%
rvest::html_nodes("div.scheme-block.is-hide[data-scheme='stats']") %>%
Expand All @@ -24,29 +25,29 @@ understat_match_stats <- function(match_url) {

match_stats <- data.frame(

match_id = gsub("[^0-9]", "", match_url),
match_id = as.integer(gsub("[^0-9]", "", match_url)),

home_team = away[1],
home_chances = away[2],
home_goals = home[3],
home_xG = home[4],
home_shots = home[5],
home_shot_on_target = home[6],
home_deep = home[7],
home_PPDA = home[8],
home_xPTS = home[9],
home_team = as.character(away[1]),
home_chances = as.integer(gsub("[^0-9]", "", away[2]))/100,
home_goals = as.integer(home[3]),
home_xG = as.numeric(home[4]),
home_shots = as.integer(home[5]),
home_shot_on_target = as.integer(home[6]),
home_deep = as.integer(home[7]),
home_PPDA = as.numeric(home[8]),
home_xPTS = as.numeric(home[9]),

draw_chances = home[2],
draw_chances = as.integer(gsub("[^0-9]", "", home[2]))/100,

away_team = home[1],
away_chances = away[3],
away_goals = away[4],
away_xG = away[5],
away_shots = away[6],
away_shot_on_target = away[7],
away_deep = away[8],
away_PPDA = away[9],
away_xPTS = away[10]
away_chances = as.integer(gsub("[^0-9]", "", away[3]))/100 ,
away_goals = as.integer(away[4]),
away_xG = as.numeric(away[5]),
away_shots = as.integer(away[6]),
away_shot_on_target = as.integer(away[7]),
away_deep = as.integer(away[8]),
away_PPDA = as.numeric(away[9]),
away_xPTS = as.numeric(away[10])

)

Expand Down
6 changes: 3 additions & 3 deletions man/understat_match_players.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 6 additions & 3 deletions man/understat_match_stats.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 16 additions & 0 deletions tests/testthat/test-understat.R
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,19 @@ test_that("understat_team_stats_breakdown() works", {
expect_equal(ncol(team_stats), 11)
expect_gt(nrow(team_stats), 0)
})

test_that("understat_match_players() works", {
testthat::skip_on_cran()
match_players <- understat_match_players(match_url = "https://understat.com/match/14789")
expect_true(any("data.frame" == class(match_players)))
expect_equal(ncol(match_players), 22)
expect_gt(nrow(match_players), 0)
})

test_that("understat_match_stats() works", {
testthat::skip_on_cran()
match_stats <- understat_match_stats(match_url = "https://understat.com/match/14789")
expect_true(any("data.frame" == class(match_stats)))
expect_equal(ncol(match_stats), 20)
expect_equal(nrow(match_stats), 1)
})

0 comments on commit be60541

Please sign in to comment.