From 68b17fab618f89703beb8da6ae7ca0cf753d5347 Mon Sep 17 00:00:00 2001 From: Saiem Gilani Date: Thu, 6 May 2021 18:18:23 -0400 Subject: [PATCH 1/5] take it back, take it way back switch from data/rds to pbp/rds data repo path --- R/cfb_pbp.R | 20 ++++++++++---------- vignettes/intro.Rmd | 10 +++++----- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/R/cfb_pbp.R b/R/cfb_pbp.R index 0ce9f0a0..20c05fd4 100644 --- a/R/cfb_pbp.R +++ b/R/cfb_pbp.R @@ -22,8 +22,8 @@ load_cfb_pbp <- function(seasons, ..., qs = FALSE) { most_recent <- most_recent_season() - if (!all(seasons %in% 2014:most_recent)) { - usethis::ui_stop("Please pass valid seasons between 2014 and {most_recent}") + if (!all(seasons %in% 2002:most_recent)) { + usethis::ui_stop("Please pass valid seasons between 2002 and {most_recent}") } if (length(seasons) > 1 && is_sequential() && isFALSE(in_db)) { @@ -52,12 +52,12 @@ load_cfb_pbp <- function(seasons, ..., qs = FALSE) { cfb_single_season <- function(season, p, dbConnection = NULL, tablename = NULL, qs = FALSE) { if (isTRUE(qs)) { - .url <- glue::glue("https://github.com/saiemgilani/cfbfastR-data/blob/master/data/rds/pbp_players_pos_{season}.qs") + .url <- glue::glue("https://github.com/saiemgilani/cfbfastR-data/blob/master/pbp/rds/play_by_play_{season}.qs") pbp <- qs_from_url(.url) } if (isFALSE(qs)) { - .url <- glue::glue("https://raw.githubusercontent.com/saiemgilani/cfbfastR-data/master/data/rds/pbp_players_pos_{season}.rds") + .url <- glue::glue("https://raw.githubusercontent.com/saiemgilani/cfbfastR-data/master/pbp/rds/play_by_play_{season}.rds") con <- url(.url) pbp <- readRDS(con) close(con) @@ -74,7 +74,7 @@ cfb_single_season <- function(season, p, dbConnection = NULL, tablename = NULL, # load games file load_games <- function(){ - .url <- "https://raw.githubusercontent.com/saiemgilani/cfbfastR-data/master/data/games_in_data_repo.csv" + .url <- "https://raw.githubusercontent.com/saiemgilani/cfbfastR-data/master/pbp/cfb_games_in_data_repo.csv" con <- url(.url) dat <- utils::read.csv(con) # close(con) @@ -86,13 +86,13 @@ load_games <- function(){ #' @title #' **Update or create a cfbfastR play-by-play database** #' @description `update_cfb_db()` updates or creates a database with `cfbfastR` -#' play by play data of all completed games since 2014. +#' play by play data of all completed and available games since 2002. #' #' @details This function creates and updates a data table with the name `tblname` #' within a SQLite database (other drivers via `db_connection`) located in #' `dbdir` and named `dbname`. #' The data table combines all play by play data for every available game back -#' to the 2014 season and adds the most recent completed games as soon as they +#' to the 2002 season and adds the most recent completed games as soon as they #' are available for `cfbfastR`. #' #' The argument `force_rebuild` is of hybrid type. It can rebuild the play @@ -165,8 +165,8 @@ update_cfb_db <- function(dbdir = ".", # get completed games using Lee's file (thanks Lee!) user_message("Checking for missing completed games...", "todo") completed_games <- load_games() %>% - # completed games since 2014, excluding the broken games - dplyr::filter(.data$season >= 2014) %>% + # completed games since 2002, excluding the broken games + dplyr::filter(.data$season >= 2002) %>% dplyr::arrange(.data$week) %>% dplyr::pull(.data$game_id) @@ -202,7 +202,7 @@ update_cfb_db <- function(dbdir = ".", build_cfb_db <- function(tblname = "cfbfastR_pbp", db_conn, rebuild = FALSE, show_message = TRUE) { valid_seasons <- load_games() %>% - dplyr::filter(.data$season >= 2014) %>% + dplyr::filter(.data$season >= 2002) %>% dplyr::group_by(.data$season) %>% dplyr::summarise() %>% dplyr::ungroup() diff --git a/vignettes/intro.Rmd b/vignettes/intro.Rmd index 3a0a84d1..3c03776c 100644 --- a/vignettes/intro.Rmd +++ b/vignettes/intro.Rmd @@ -180,14 +180,14 @@ year_split19 = lapply(year_split, function(x) { pbp_2019 = bind_rows(year_split19) ``` -## **The fastR way**: `load_cfb_pbp()` (7 seasons, \~1-1.5 minutes `r emo::ji("flame")`) +## **The fastR way**: `load_cfb_pbp()` (19 seasons, \~1-1.5 minutes `r emo::ji("flame")`) -We are going to load in data for seasons 2014-2020, it'll take between 45-90 seconds to run. +We are going to load in data for seasons 2002-2020, it'll take between 120-180 seconds to run. -```{r load_2014_2020, warning = FALSE} +```{r load_2002_2020, warning = FALSE} tictoc::tic() pbp <- data.frame() -seasons <- 2014:2020 +seasons <- 2002:2020 progressr::with_progress({ future::plan("multisession") pbp <- cfbfastR::load_cfb_pbp(seasons) @@ -208,4 +208,4 @@ So there are three basic ids within each game, - the id for the drive (`drive_id`), - the id for the play (`id_play` or `play_id` depending on which data set you are looking at). -These are useful for all kinds of grouping, joining and sorting tasks. The columns `pos_team` and `def_pos_team` are essentially your offense and defense (the main difference is kickoffs, the team receiving the kickoff is the `pos_team`) for the play/drive. From there you have the typical descriptions, play types and yardage columns. Beyond that, you will see the origin of why this package came to be, building expected points and win probability metrics for in-game valuation of plays. +These are useful for all kinds of grouping, joining and sorting tasks. The columns `pos_team` and `def_pos_team` are essentially your offense and defense (the main difference is kickoffs, the team receiving the kickoff is the `pos_team`) for the play/drive. From there you have the typical descriptions, play types and yardage columns. Beyond that, you will see the origin of why this package came to be, building expected points and win probability metrics for in-game valuation of plays. From ce26c759c35e9acb4e0b93be88757661e06fd52e Mon Sep 17 00:00:00 2001 From: Saiem Gilani Date: Fri, 7 May 2021 18:13:21 -0400 Subject: [PATCH 2/5] fix badges, digits --- R/cfb_pbp.R | 2 ++ README.Rmd | 10 ++++------ README.md | 9 ++++----- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/R/cfb_pbp.R b/R/cfb_pbp.R index 20c05fd4..a4ea33ef 100644 --- a/R/cfb_pbp.R +++ b/R/cfb_pbp.R @@ -12,6 +12,8 @@ NULL #' @param qs Wheter to use the function [qs::qdeserialize()] for more efficient loading. #' @export load_cfb_pbp <- function(seasons, ..., qs = FALSE) { + options(stringsAsFactors = FALSE) + options(scipen = 999) dots <- rlang::dots_list(...) if (all(c("dbConnection", "tablename") %in% names(dots))) in_db <- TRUE else in_db <- FALSE diff --git a/README.Rmd b/README.Rmd index 37308ff1..5cd46c44 100644 --- a/README.Rmd +++ b/README.Rmd @@ -46,13 +46,11 @@ x2 <- closed_tbl %>% - -![Lifecycle:maturing](https://img.shields.io/badge/lifecycle-maturing-blue.svg?style=for-the-badge&logo=github) -![R-CMD-check](https://img.shields.io/github/workflow/status/saiemgilani/cfbfastr/R-CMD-check?label=R-CMD-Check&logo=R&logoColor=blue&style=for-the-badge) -![Contributors](https://img.shields.io/github/contributors/saiemgilani/cfbfastR?style=for-the-badge) -![Version-Number](https://img.shields.io/github/r-package/v/saiemgilani/cfbfastr?label=cfbfastR&logo=R&style=for-the-badge) [![Twitter Follow](https://img.shields.io/twitter/follow/cfbfastR?color=blue&label=%40cfbfastR&logo=twitter&style=for-the-badge)](https://twitter.com/cfbfastR) - +[![Version-Number](https://img.shields.io/github/r-package/v/saiemgilani/cfbfastr?label=cfbfastR&logo=R&style=for-the-badge)](https://github.com/saiemgilani/cfbfastR/) +[![Lifecycle:maturing](https://img.shields.io/badge/lifecycle-maturing-blue.svg?style=for-the-badge&logo=github)](https://github.com/saiemgilani/cfbfastR/) +[![R-CMD-check](https://img.shields.io/github/workflow/status/saiemgilani/cfbfastr/R-CMD-check?label=R-CMD-Check&logo=R&logoColor=blue&style=for-the-badge)](https://github.com/saiemgilani/cfbfastR/actions/workflows/R-CMD-check.yaml) +[![Contributors](https://img.shields.io/github/contributors/saiemgilani/cfbfastR?style=for-the-badge)](https://github.com/saiemgilani/cfbfastR/graphs/contributors) The goal of [**```cfbfastR```**](https://saiemgilani.github.io/cfbfastR/) is to provide the community with an R package for working with CFB data. It is an R API wrapper around [https://collegefootballdata.com/](https://collegefootballdata.com/). Beyond data aggregation and tidying ease, one of the multitude of services that [**```cfbfastR```**](https://saiemgilani.github.io/cfbfastR/) provides is for benchmarking open-source expected points and win probability metrics. diff --git a/README.md b/README.md index 879821ea..21eaf415 100644 --- a/README.md +++ b/README.md @@ -3,13 +3,12 @@ -![Lifecycle:maturing](https://img.shields.io/badge/lifecycle-maturing-blue.svg?style=for-the-badge&logo=github) -![R-CMD-check](https://img.shields.io/github/workflow/status/saiemgilani/cfbfastr/R-CMD-check?label=R-CMD-Check&logo=R&logoColor=blue&style=for-the-badge) -![Contributors](https://img.shields.io/github/contributors/saiemgilani/cfbfastR?style=for-the-badge) -![Version-Number](https://img.shields.io/github/r-package/v/saiemgilani/cfbfastr?label=cfbfastR&logo=R&style=for-the-badge) [![Twitter Follow](https://img.shields.io/twitter/follow/cfbfastR?color=blue&label=%40cfbfastR&logo=twitter&style=for-the-badge)](https://twitter.com/cfbfastR) - +[![Version-Number](https://img.shields.io/github/r-package/v/saiemgilani/cfbfastr?label=cfbfastR&logo=R&style=for-the-badge)](https://github.com/saiemgilani/cfbfastR/) +[![Lifecycle:maturing](https://img.shields.io/badge/lifecycle-maturing-blue.svg?style=for-the-badge&logo=github)](https://github.com/saiemgilani/cfbfastR/) +[![R-CMD-check](https://img.shields.io/github/workflow/status/saiemgilani/cfbfastr/R-CMD-check?label=R-CMD-Check&logo=R&logoColor=blue&style=for-the-badge)](https://github.com/saiemgilani/cfbfastR/actions/workflows/R-CMD-check.yaml) +[![Contributors](https://img.shields.io/github/contributors/saiemgilani/cfbfastR?style=for-the-badge)](https://github.com/saiemgilani/cfbfastR/graphs/contributors) The goal of [**`cfbfastR`**](https://saiemgilani.github.io/cfbfastR/) is From 6f63304ea0b697525f1118b034cbd6be6cf00e13 Mon Sep 17 00:00:00 2001 From: Saiem Gilani Date: Sun, 27 Jun 2021 17:57:29 -0400 Subject: [PATCH 3/5] we on 2.0 --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 59d97b54..81d322ed 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: cfbfastR Title: Functions to Access College Football Play by Play Data -Version: 1.3.3 +Version: 2.0.0 Authors@R: c(person(given = "Saiem", family = "Gilani", From 904e7564d050fcefa39006b121cb45444ac9b4ea Mon Sep 17 00:00:00 2001 From: Saiem Gilani Date: Tue, 29 Jun 2021 12:12:43 -0400 Subject: [PATCH 4/5] we on 1.9.9 --- DESCRIPTION | 2 +- NAMESPACE | 256 ++++++++++----------- NEWS.md | 4 + R/cfb_pbp.R | 502 +++++++++++++++++++++--------------------- R/cfbd_games.R | 1 + README.Rmd | 14 +- _pkgdown.yml | 11 + man/cfbd_game_info.Rd | 1 + man/update_cfb_db.Rd | 126 +++++------ vignettes/intro.Rmd | 8 +- 10 files changed, 476 insertions(+), 449 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 81d322ed..592ac299 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: cfbfastR Title: Functions to Access College Football Play by Play Data -Version: 2.0.0 +Version: 1.9.9 Authors@R: c(person(given = "Saiem", family = "Gilani", diff --git a/NAMESPACE b/NAMESPACE index 36d83b8b..417a8810 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,128 +1,128 @@ -# Generated by roxygen2: do not edit by hand - -export(add_play_counts) -export(add_player_cols) -export(add_yardage) -export(cfbd_betting_lines) -export(cfbd_calendar) -export(cfbd_coaches) -export(cfbd_conferences) -export(cfbd_draft_picks) -export(cfbd_draft_positions) -export(cfbd_draft_teams) -export(cfbd_drives) -export(cfbd_game_box_advanced) -export(cfbd_game_info) -export(cfbd_game_media) -export(cfbd_game_player_stats) -export(cfbd_game_records) -export(cfbd_game_team_stats) -export(cfbd_key) -export(cfbd_metrics_ppa_games) -export(cfbd_metrics_ppa_players_games) -export(cfbd_metrics_ppa_players_season) -export(cfbd_metrics_ppa_predicted) -export(cfbd_metrics_ppa_teams) -export(cfbd_metrics_wp) -export(cfbd_metrics_wp_pregame) -export(cfbd_pbp_data) -export(cfbd_play_stats_player) -export(cfbd_play_stats_types) -export(cfbd_play_types) -export(cfbd_player_info) -export(cfbd_player_returning) -export(cfbd_player_usage) -export(cfbd_plays) -export(cfbd_rankings) -export(cfbd_ratings_sp) -export(cfbd_ratings_sp_conference) -export(cfbd_ratings_srs) -export(cfbd_recruiting_player) -export(cfbd_recruiting_position) -export(cfbd_recruiting_team) -export(cfbd_stats_categories) -export(cfbd_stats_game_advanced) -export(cfbd_stats_season_advanced) -export(cfbd_stats_season_player) -export(cfbd_stats_season_team) -export(cfbd_team_info) -export(cfbd_team_matchup) -export(cfbd_team_matchup_records) -export(cfbd_team_roster) -export(cfbd_team_talent) -export(cfbd_venues) -export(clean_drive_dat) -export(clean_drive_info) -export(clean_pbp_dat) -export(create_epa) -export(create_wpa_naive) -export(epa_fg_probs) -export(espn_metrics_wp) -export(has_cfbd_key) -export(load_cfb_pbp) -export(penalty_detection) -export(prep_epa_df_after) -export(update_cfb_db) -export(wpa_calcs_naive) -import(dplyr) -import(purrr) -import(stringr) -import(tidyr) -importFrom(assertthat,assert_that) -importFrom(dplyr,arrange) -importFrom(dplyr,as_tibble) -importFrom(dplyr,between) -importFrom(dplyr,case_when) -importFrom(dplyr,filter) -importFrom(dplyr,group_by) -importFrom(dplyr,if_else) -importFrom(dplyr,lag) -importFrom(dplyr,lead) -importFrom(dplyr,left_join) -importFrom(dplyr,mutate) -importFrom(dplyr,mutate_at) -importFrom(dplyr,n) -importFrom(dplyr,rename) -importFrom(dplyr,row_number) -importFrom(dplyr,select) -importFrom(dplyr,setdiff) -importFrom(dplyr,ungroup) -importFrom(glue,glue) -importFrom(httr,GET) -importFrom(httr,RETRY) -importFrom(httr,status_code) -importFrom(janitor,clean_names) -importFrom(jsonlite,fromJSON) -importFrom(magrittr,"%>%") -importFrom(mgcv,bam) -importFrom(mgcv,predict.bam) -importFrom(nnet,multinom) -importFrom(purrr,map) -importFrom(purrr,map_dfr) -importFrom(purrr,map_if) -importFrom(purrr,pluck) -importFrom(purrr,quietly) -importFrom(purrr,set_names) -importFrom(rlang,.data) -importFrom(stats,na.omit) -importFrom(stats,predict) -importFrom(stringi,stri_extract_first_regex) -importFrom(stringr,str_detect) -importFrom(stringr,str_extract) -importFrom(stringr,str_length) -importFrom(stringr,str_remove) -importFrom(stringr,str_sub) -importFrom(stringr,str_trim) -importFrom(tibble,enframe) -importFrom(tidyr,everything) -importFrom(tidyr,fill) -importFrom(tidyr,pivot_wider) -importFrom(tidyr,unnest) -importFrom(tidyr,unnest_wider) -importFrom(utils,URLdecode) -importFrom(utils,URLencode) -importFrom(utils,data) -importFrom(utils,globalVariables) -importFrom(utils,installed.packages) -importFrom(utils,packageDescription) -importFrom(utils,packageVersion) +# Generated by roxygen2: do not edit by hand + +export(add_play_counts) +export(add_player_cols) +export(add_yardage) +export(cfbd_betting_lines) +export(cfbd_calendar) +export(cfbd_coaches) +export(cfbd_conferences) +export(cfbd_draft_picks) +export(cfbd_draft_positions) +export(cfbd_draft_teams) +export(cfbd_drives) +export(cfbd_game_box_advanced) +export(cfbd_game_info) +export(cfbd_game_media) +export(cfbd_game_player_stats) +export(cfbd_game_records) +export(cfbd_game_team_stats) +export(cfbd_key) +export(cfbd_metrics_ppa_games) +export(cfbd_metrics_ppa_players_games) +export(cfbd_metrics_ppa_players_season) +export(cfbd_metrics_ppa_predicted) +export(cfbd_metrics_ppa_teams) +export(cfbd_metrics_wp) +export(cfbd_metrics_wp_pregame) +export(cfbd_pbp_data) +export(cfbd_play_stats_player) +export(cfbd_play_stats_types) +export(cfbd_play_types) +export(cfbd_player_info) +export(cfbd_player_returning) +export(cfbd_player_usage) +export(cfbd_plays) +export(cfbd_rankings) +export(cfbd_ratings_sp) +export(cfbd_ratings_sp_conference) +export(cfbd_ratings_srs) +export(cfbd_recruiting_player) +export(cfbd_recruiting_position) +export(cfbd_recruiting_team) +export(cfbd_stats_categories) +export(cfbd_stats_game_advanced) +export(cfbd_stats_season_advanced) +export(cfbd_stats_season_player) +export(cfbd_stats_season_team) +export(cfbd_team_info) +export(cfbd_team_matchup) +export(cfbd_team_matchup_records) +export(cfbd_team_roster) +export(cfbd_team_talent) +export(cfbd_venues) +export(clean_drive_dat) +export(clean_drive_info) +export(clean_pbp_dat) +export(create_epa) +export(create_wpa_naive) +export(epa_fg_probs) +export(espn_metrics_wp) +export(has_cfbd_key) +export(load_cfb_pbp) +export(penalty_detection) +export(prep_epa_df_after) +export(update_cfb_db) +export(wpa_calcs_naive) +import(dplyr) +import(purrr) +import(stringr) +import(tidyr) +importFrom(assertthat,assert_that) +importFrom(dplyr,arrange) +importFrom(dplyr,as_tibble) +importFrom(dplyr,between) +importFrom(dplyr,case_when) +importFrom(dplyr,filter) +importFrom(dplyr,group_by) +importFrom(dplyr,if_else) +importFrom(dplyr,lag) +importFrom(dplyr,lead) +importFrom(dplyr,left_join) +importFrom(dplyr,mutate) +importFrom(dplyr,mutate_at) +importFrom(dplyr,n) +importFrom(dplyr,rename) +importFrom(dplyr,row_number) +importFrom(dplyr,select) +importFrom(dplyr,setdiff) +importFrom(dplyr,ungroup) +importFrom(glue,glue) +importFrom(httr,GET) +importFrom(httr,RETRY) +importFrom(httr,status_code) +importFrom(janitor,clean_names) +importFrom(jsonlite,fromJSON) +importFrom(magrittr,"%>%") +importFrom(mgcv,bam) +importFrom(mgcv,predict.bam) +importFrom(nnet,multinom) +importFrom(purrr,map) +importFrom(purrr,map_dfr) +importFrom(purrr,map_if) +importFrom(purrr,pluck) +importFrom(purrr,quietly) +importFrom(purrr,set_names) +importFrom(rlang,.data) +importFrom(stats,na.omit) +importFrom(stats,predict) +importFrom(stringi,stri_extract_first_regex) +importFrom(stringr,str_detect) +importFrom(stringr,str_extract) +importFrom(stringr,str_length) +importFrom(stringr,str_remove) +importFrom(stringr,str_sub) +importFrom(stringr,str_trim) +importFrom(tibble,enframe) +importFrom(tidyr,everything) +importFrom(tidyr,fill) +importFrom(tidyr,pivot_wider) +importFrom(tidyr,unnest) +importFrom(tidyr,unnest_wider) +importFrom(utils,URLdecode) +importFrom(utils,URLencode) +importFrom(utils,data) +importFrom(utils,globalVariables) +importFrom(utils,installed.packages) +importFrom(utils,packageDescription) +importFrom(utils,packageVersion) diff --git a/NEWS.md b/NEWS.md index cc76fef6..0257ec6a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +# **cfbfastR v1.9.9** +### Expected points data back to 2003 added to the package function [```load_cfb_pbp()```](https://saiemgilani.github.io/cfbfastR/reference/load_cfb_pbp.html) and [```update_cfb_db()```](https://saiemgilani.github.io/cfbfastR/reference/update_cfb_db.html) +- There are a few known errors with the calculations that need to be looked into further. One specific one identified by Brendan Farrell noting that short yardage touchdowns for pre-2008 data were often negative EPA plays. This is either a duplication issue or a coding error. + # **cfbfastR v1.3.3** ### Hotfix [```cfbd_game_player_stats()```](https://saiemgilani.github.io/cfbfastR/reference/cfbd_game_player_stats.html) diff --git a/R/cfb_pbp.R b/R/cfb_pbp.R index a4ea33ef..34bc480f 100644 --- a/R/cfb_pbp.R +++ b/R/cfb_pbp.R @@ -1,251 +1,251 @@ -#' **Load cfbfastR play-by-play** -#' @name load_cfb_pbp -NULL -#' @title -#' **Load cleaned play-by-play from the data repo** -#' @rdname load_cfb_pbp -#' @description helper that loads multiple seasons from the data repo either into memory -#' or writes it into a db using some forwarded arguments in the dots -#' @param seasons A vector of 4-digit years associated with given College Football seasons. -#' @param ... Additional arguments passed to an underlying function that writes -#' the season data into a database (used by \code{\link[=update_cfb_db]{update_cfb_db()}}). -#' @param qs Wheter to use the function [qs::qdeserialize()] for more efficient loading. -#' @export -load_cfb_pbp <- function(seasons, ..., qs = FALSE) { - options(stringsAsFactors = FALSE) - options(scipen = 999) - dots <- rlang::dots_list(...) - - if (all(c("dbConnection", "tablename") %in% names(dots))) in_db <- TRUE else in_db <- FALSE - - if (isTRUE(qs) && !is_installed("qs")) { - usethis::ui_stop("Package {usethis::ui_value('qs')} required for argument {usethis::ui_value('qs = TRUE')}. Please install it.") - } - - most_recent <- most_recent_season() - - if (!all(seasons %in% 2002:most_recent)) { - usethis::ui_stop("Please pass valid seasons between 2002 and {most_recent}") - } - - if (length(seasons) > 1 && is_sequential() && isFALSE(in_db)) { - usethis::ui_info(c( - "It is recommended to use parallel processing when trying to load multiple seasons.", - "Please consider running {usethis::ui_code('future::plan(\"multisession\")')}!", - "Will go on sequentially..." - )) - } - - - p <- progressr::progressor(along = seasons) - - if (isFALSE(in_db)) { - out <- furrr::future_map_dfr(seasons, cfb_single_season, p = p, qs = qs) - } - - if (isTRUE(in_db)) { - purrr::walk(seasons, cfb_single_season, p, ..., qs = qs) - out <- NULL - } - - return(out) -} - -cfb_single_season <- function(season, p, dbConnection = NULL, tablename = NULL, qs = FALSE) { - if (isTRUE(qs)) { - - .url <- glue::glue("https://github.com/saiemgilani/cfbfastR-data/blob/master/pbp/rds/play_by_play_{season}.qs") - pbp <- qs_from_url(.url) - - } - if (isFALSE(qs)) { - .url <- glue::glue("https://raw.githubusercontent.com/saiemgilani/cfbfastR-data/master/pbp/rds/play_by_play_{season}.rds") - con <- url(.url) - pbp <- readRDS(con) - close(con) - } - if (!is.null(dbConnection) && !is.null(tablename)) { - DBI::dbWriteTable(dbConnection, tablename, pbp, append = TRUE) - out <- NULL - } else { - out <- pbp - } - p(sprintf("season=%g", season)) - return(out) -} - -# load games file -load_games <- function(){ - .url <- "https://raw.githubusercontent.com/saiemgilani/cfbfastR-data/master/pbp/cfb_games_in_data_repo.csv" - con <- url(.url) - dat <- utils::read.csv(con) - # close(con) - return (dat) -} - -#' @name update_cfb_db -#' @aliases update_cfb_db cfb_db cfb database cfb_pbp_db -#' @title -#' **Update or create a cfbfastR play-by-play database** -#' @description `update_cfb_db()` updates or creates a database with `cfbfastR` -#' play by play data of all completed and available games since 2002. -#' -#' @details This function creates and updates a data table with the name `tblname` -#' within a SQLite database (other drivers via `db_connection`) located in -#' `dbdir` and named `dbname`. -#' The data table combines all play by play data for every available game back -#' to the 2002 season and adds the most recent completed games as soon as they -#' are available for `cfbfastR`. -#' -#' The argument `force_rebuild` is of hybrid type. It can rebuild the play -#' by play data table either for the whole cfbfastR era (with `force_rebuild = TRUE`) -#' or just for specified seasons (e.g. `force_rebuild = c(2019, 2020)`). -#' Please note the following behavior: -#' \itemize{ -#' \item{`force_rebuild = TRUE`}{: The data table with the name `tblname` -#' will be removed completely and rebuilt from scratch. This is helpful when -#' new columns are added during the Off-Season.} -#' \item{`force_rebuild = c(2019, 2020)`}{: The data table with the name `tblname` -#' will be preserved and only rows from the 2019 and 2020 seasons will be -#' deleted and re-added. This is intended to be used for ongoing seasons because -#' ESPN's data provider can make changes to the underlying data during the week.} -#' } -#' -#' The parameter `db_connection` is intended for advanced users who want -#' to use other DBI drivers, such as MariaDB, Postgres or odbc. Please note that -#' the arguments `dbdir` and `dbname` are dropped in case a `db_connection` -#' is provided but the argument `tblname` will still be used to write the -#' data table into the database. -#' -#' @param dbdir Directory in which the database is or shall be located -#' @param dbname File name of an existing or desired SQLite database within `dbdir` -#' @param tblname The name of the play by play data table within the database -#' @param force_rebuild Hybrid parameter (logical or numeric) to rebuild parts -#' of or the complete play by play data table within the database (please see details for further information) -#' @param db_connection A `DBIConnection` object, as returned by -#' [DBI::dbConnect()] (please see details for further information) -#' @export -update_cfb_db <- function(dbdir = ".", - dbname = "cfb_pbp_db", - tblname = "cfbfastR_pbp", - force_rebuild = FALSE, - db_connection = NULL) { - - # rule_header("Update cfbfastR Play-by-Play Database") - - if (!is_installed("DBI") | !is_installed("purrr") | - (!is_installed("RSQLite") & is.null(db_connection))) { - usethis::ui_stop("{my_time()} | Packages {usethis::ui_value('DBI')}, {usethis::ui_value('RSQLite')} and {usethis::ui_value('purrr')} required for database communication. Please install them.") - } - - if (any(force_rebuild == "NEW")) { - usethis::ui_stop("{my_time()} | The argument {usethis::ui_value('force_rebuild = NEW')} is only for internal usage!") - } - - if (!(is.logical(force_rebuild) | is.numeric(force_rebuild))) { - usethis::ui_stop("{my_time()} | The argument {usethis::ui_value('force_rebuild')} has to be either logical or numeric!") - } - - if (!dir.exists(dbdir) & is.null(db_connection)) { - usethis::ui_oops("{my_time()} | Directory {usethis::ui_path(dbdir)} doesn't exist yet. Try creating...") - dir.create(dbdir) - } - - if (is.null(db_connection)) { - connection <- DBI::dbConnect(RSQLite::SQLite(), glue::glue("{dbdir}/{dbname}")) - } else { - connection <- db_connection - } - - # create db if it doesn't exist or user forces rebuild - if (!DBI::dbExistsTable(connection, tblname)) { - build_cfb_db(tblname, connection, rebuild = "NEW") - } else if (DBI::dbExistsTable(connection, tblname) & all(force_rebuild != FALSE)) { - build_cfb_db(tblname, connection, rebuild = force_rebuild) - } - - # get completed games using Lee's file (thanks Lee!) - user_message("Checking for missing completed games...", "todo") - completed_games <- load_games() %>% - # completed games since 2002, excluding the broken games - dplyr::filter(.data$season >= 2002) %>% - dplyr::arrange(.data$week) %>% - dplyr::pull(.data$game_id) - - # function below - missing <- get_missing_cfb_games(completed_games, connection, tblname) - - # rebuild db if number of missing games is too large - if(length(missing) > 16) {# limit set to >16 to make sure this doesn't get triggered on gameday (e.g. week 17) - # message("The number of missing games is so large that rebuilding the database is more efficient.") - build_cfb_db(tblname, connection, show_message = FALSE, rebuild = as.numeric(unique(stringr::str_sub(missing, 1, 4)))) - missing <- get_missing_cfb_games(completed_games, connection, tblname) - } - - # # if there's missing games, scrape and write to db - # if (length(missing) > 0) { - # new_pbp <- build_cfbfastR_pbp(missing, rules = FALSE) - # - # if (nrow(new_pbp) == 0) { - # user_message("Raw data of new games are not yet ready. Please try again in about 10 minutes.", "oops") - # } else { - # user_message("Appending new data to database...", "todo") - # DBI::dbWriteTable(connection, tblname, new_pbp, append = TRUE) - # } - # } - - message_completed("Database update completed", in_builder = TRUE) - usethis::ui_info("{my_time()} | Path to your db: {usethis::ui_path(DBI::dbGetInfo(connection)$dbname)}") - if (is.null(db_connection)) DBI::dbDisconnect(connection) - # rule_footer("DONE") -} - -# this is a helper function to build cfbfastR database from Scratch -build_cfb_db <- function(tblname = "cfbfastR_pbp", db_conn, rebuild = FALSE, show_message = TRUE) { - - valid_seasons <- load_games() %>% - dplyr::filter(.data$season >= 2002) %>% - dplyr::group_by(.data$season) %>% - dplyr::summarise() %>% - dplyr::ungroup() - - if (all(rebuild == TRUE)) { - usethis::ui_todo("{my_time()} | Purging the complete data table {usethis::ui_value(tblname)} in your connected database...") - DBI::dbRemoveTable(db_conn, tblname) - seasons <- valid_seasons %>% dplyr::pull("season") - usethis::ui_todo("{my_time()} | Starting download of {length(seasons)} seasons between {min(seasons)} and {max(seasons)}...") - } else if (is.numeric(rebuild) & all(rebuild %in% valid_seasons$season)) { - string <- paste0(rebuild, collapse = ", ") - if (show_message){usethis::ui_todo("{my_time()} | Purging {string} season(s) from the data table {usethis::ui_value(tblname)} in your connected database...")} - DBI::dbExecute(db_conn, glue::glue_sql("DELETE FROM {`tblname`} WHERE season IN ({vals*})", vals = rebuild, .con = db_conn)) - seasons <- valid_seasons %>% dplyr::filter(.data$season %in% rebuild) %>% dplyr::pull("season") - usethis::ui_todo("{my_time()} | Starting download of the {string} season(s)...") - } else if (all(rebuild == "NEW")) { - usethis::ui_info("{my_time()} | Can't find the data table {usethis::ui_value(tblname)} in your database. Will load the play by play data from scratch.") - seasons <- valid_seasons %>% dplyr::pull("season") - usethis::ui_todo("{my_time()} | Starting download of {length(seasons)} seasons between {min(seasons)} and {max(seasons)}...") - } else { - seasons <- NULL - usethis::ui_oops("{my_time()} | At least one invalid value passed to argument {usethis::ui_code('force_rebuild')}. Please try again with valid input.") - } - - if (!is.null(seasons)) { - # this function lives in R/utils.R - load_cfb_pbp(seasons, dbConnection = db_conn, tablename = tblname, qs = FALSE) - } -} - -# this is a helper function to check a list of completed games -# against the games that exist in a database connection -get_missing_cfb_games <- function(completed_games, dbConnection, tablename) { - db_ids <- dplyr::tbl(dbConnection, tablename) %>% - dplyr::select("game_id") %>% - dplyr::distinct() %>% - dplyr::collect() %>% - dplyr::pull("game_id") - - need_scrape <- completed_games[!completed_games %in% db_ids] - - usethis::ui_info("{my_time()} | You have {length(db_ids)} games and are missing {length(need_scrape)}.") - return(need_scrape) -} +#' **Load cfbfastR play-by-play** +#' @name load_cfb_pbp +NULL +#' @title +#' **Load cleaned play-by-play from the data repo** +#' @rdname load_cfb_pbp +#' @description helper that loads multiple seasons from the data repo either into memory +#' or writes it into a db using some forwarded arguments in the dots +#' @param seasons A vector of 4-digit years associated with given College Football seasons. +#' @param ... Additional arguments passed to an underlying function that writes +#' the season data into a database (used by \code{\link[=update_cfb_db]{update_cfb_db()}}). +#' @param qs Wheter to use the function [qs::qdeserialize()] for more efficient loading. +#' @export +load_cfb_pbp <- function(seasons, ..., qs = FALSE) { + options(stringsAsFactors = FALSE) + options(scipen = 999) + dots <- rlang::dots_list(...) + + if (all(c("dbConnection", "tablename") %in% names(dots))) in_db <- TRUE else in_db <- FALSE + + if (isTRUE(qs) && !is_installed("qs")) { + usethis::ui_stop("Package {usethis::ui_value('qs')} required for argument {usethis::ui_value('qs = TRUE')}. Please install it.") + } + + most_recent <- most_recent_season() + + if (!all(seasons %in% 2003:most_recent)) { + usethis::ui_stop("Please pass valid seasons between 2003 and {most_recent}") + } + + if (length(seasons) > 1 && is_sequential() && isFALSE(in_db)) { + usethis::ui_info(c( + "It is recommended to use parallel processing when trying to load multiple seasons.", + "Please consider running {usethis::ui_code('future::plan(\"multisession\")')}!", + "Will go on sequentially..." + )) + } + + + p <- progressr::progressor(along = seasons) + + if (isFALSE(in_db)) { + out <- furrr::future_map_dfr(rev(seasons), cfb_single_season, p = p, qs = qs) + } + + if (isTRUE(in_db)) { + purrr::walk(seasons, cfb_single_season, p, ..., qs = qs) + out <- NULL + } + + return(out) +} + +cfb_single_season <- function(season, p, dbConnection = NULL, tablename = NULL, qs = FALSE) { + if (isTRUE(qs)) { + + .url <- glue::glue("https://github.com/saiemgilani/cfbfastR-data/blob/master/pbp/rds/play_by_play_{season}.qs") + pbp <- qs_from_url(.url) + + } + if (isFALSE(qs)) { + .url <- glue::glue("https://raw.githubusercontent.com/saiemgilani/cfbfastR-data/master/pbp/rds/play_by_play_{season}.rds") + con <- url(.url) + pbp <- readRDS(con) + close(con) + } + if (!is.null(dbConnection) && !is.null(tablename)) { + DBI::dbWriteTable(dbConnection, tablename, pbp, append = TRUE) + out <- NULL + } else { + out <- pbp + } + p(sprintf("season=%g", season)) + return(out) +} + +# load games file +load_games <- function(){ + .url <- "https://raw.githubusercontent.com/saiemgilani/cfbfastR-data/master/pbp/cfb_games_in_data_repo.csv" + con <- url(.url) + dat <- utils::read.csv(con) + # close(con) + return (dat) +} + +#' @name update_cfb_db +#' @aliases update_cfb_db cfb_db cfb database cfb_pbp_db +#' @title +#' **Update or create a cfbfastR play-by-play database** +#' @description `update_cfb_db()` updates or creates a database with `cfbfastR` +#' play by play data of all completed and available games since 2003. +#' +#' @details This function creates and updates a data table with the name `tblname` +#' within a SQLite database (other drivers via `db_connection`) located in +#' `dbdir` and named `dbname`. +#' The data table combines all play by play data for every available game back +#' to the 2003 season and adds the most recent completed games as soon as they +#' are available for `cfbfastR`. +#' +#' The argument `force_rebuild` is of hybrid type. It can rebuild the play +#' by play data table either for the whole cfbfastR era (with `force_rebuild = TRUE`) +#' or just for specified seasons (e.g. `force_rebuild = c(2019, 2020)`). +#' Please note the following behavior: +#' \itemize{ +#' \item{`force_rebuild = TRUE`}{: The data table with the name `tblname` +#' will be removed completely and rebuilt from scratch. This is helpful when +#' new columns are added during the Off-Season.} +#' \item{`force_rebuild = c(2019, 2020)`}{: The data table with the name `tblname` +#' will be preserved and only rows from the 2019 and 2020 seasons will be +#' deleted and re-added. This is intended to be used for ongoing seasons because +#' ESPN's data provider can make changes to the underlying data during the week.} +#' } +#' +#' The parameter `db_connection` is intended for advanced users who want +#' to use other DBI drivers, such as MariaDB, Postgres or odbc. Please note that +#' the arguments `dbdir` and `dbname` are dropped in case a `db_connection` +#' is provided but the argument `tblname` will still be used to write the +#' data table into the database. +#' +#' @param dbdir Directory in which the database is or shall be located +#' @param dbname File name of an existing or desired SQLite database within `dbdir` +#' @param tblname The name of the play by play data table within the database +#' @param force_rebuild Hybrid parameter (logical or numeric) to rebuild parts +#' of or the complete play by play data table within the database (please see details for further information) +#' @param db_connection A `DBIConnection` object, as returned by +#' [DBI::dbConnect()] (please see details for further information) +#' @export +update_cfb_db <- function(dbdir = ".", + dbname = "cfb_pbp_db", + tblname = "cfbfastR_pbp", + force_rebuild = FALSE, + db_connection = NULL) { + + # rule_header("Update cfbfastR Play-by-Play Database") + + if (!is_installed("DBI") | !is_installed("purrr") | + (!is_installed("RSQLite") & is.null(db_connection))) { + usethis::ui_stop("{my_time()} | Packages {usethis::ui_value('DBI')}, {usethis::ui_value('RSQLite')} and {usethis::ui_value('purrr')} required for database communication. Please install them.") + } + + if (any(force_rebuild == "NEW")) { + usethis::ui_stop("{my_time()} | The argument {usethis::ui_value('force_rebuild = NEW')} is only for internal usage!") + } + + if (!(is.logical(force_rebuild) | is.numeric(force_rebuild))) { + usethis::ui_stop("{my_time()} | The argument {usethis::ui_value('force_rebuild')} has to be either logical or numeric!") + } + + if (!dir.exists(dbdir) & is.null(db_connection)) { + usethis::ui_oops("{my_time()} | Directory {usethis::ui_path(dbdir)} doesn't exist yet. Try creating...") + dir.create(dbdir) + } + + if (is.null(db_connection)) { + connection <- DBI::dbConnect(RSQLite::SQLite(), glue::glue("{dbdir}/{dbname}")) + } else { + connection <- db_connection + } + + # create db if it doesn't exist or user forces rebuild + if (!DBI::dbExistsTable(connection, tblname)) { + build_cfb_db(tblname, connection, rebuild = "NEW") + } else if (DBI::dbExistsTable(connection, tblname) & all(force_rebuild != FALSE)) { + build_cfb_db(tblname, connection, rebuild = force_rebuild) + } + + # get completed games using Lee's file (thanks Lee!) + user_message("Checking for missing completed games...", "todo") + completed_games <- load_games() %>% + # completed games since 2003, excluding the broken games + dplyr::filter(.data$season >= 2003) %>% + dplyr::arrange(.data$week) %>% + dplyr::pull(.data$game_id) + + # function below + missing <- get_missing_cfb_games(completed_games, connection, tblname) + + # rebuild db if number of missing games is too large + if(length(missing) > 16) {# limit set to >16 to make sure this doesn't get triggered on gameday (e.g. week 17) + # message("The number of missing games is so large that rebuilding the database is more efficient.") + build_cfb_db(tblname, connection, show_message = FALSE, rebuild = as.numeric(unique(stringr::str_sub(missing, 1, 4)))) + missing <- get_missing_cfb_games(completed_games, connection, tblname) + } + + # # if there's missing games, scrape and write to db + # if (length(missing) > 0) { + # new_pbp <- build_cfbfastR_pbp(missing, rules = FALSE) + # + # if (nrow(new_pbp) == 0) { + # user_message("Raw data of new games are not yet ready. Please try again in about 10 minutes.", "oops") + # } else { + # user_message("Appending new data to database...", "todo") + # DBI::dbWriteTable(connection, tblname, new_pbp, append = TRUE) + # } + # } + + message_completed("Database update completed", in_builder = TRUE) + usethis::ui_info("{my_time()} | Path to your db: {usethis::ui_path(DBI::dbGetInfo(connection)$dbname)}") + if (is.null(db_connection)) DBI::dbDisconnect(connection) + # rule_footer("DONE") +} + +# this is a helper function to build cfbfastR database from Scratch +build_cfb_db <- function(tblname = "cfbfastR_pbp", db_conn, rebuild = FALSE, show_message = TRUE) { + + valid_seasons <- load_games() %>% + dplyr::filter(.data$season >= 2003) %>% + dplyr::group_by(.data$season) %>% + dplyr::summarise() %>% + dplyr::ungroup() + + if (all(rebuild == TRUE)) { + usethis::ui_todo("{my_time()} | Purging the complete data table {usethis::ui_value(tblname)} in your connected database...") + DBI::dbRemoveTable(db_conn, tblname) + seasons <- valid_seasons %>% dplyr::pull("season") + usethis::ui_todo("{my_time()} | Starting download of {length(seasons)} seasons between {min(seasons)} and {max(seasons)}...") + } else if (is.numeric(rebuild) & all(rebuild %in% valid_seasons$season)) { + string <- paste0(rebuild, collapse = ", ") + if (show_message){usethis::ui_todo("{my_time()} | Purging {string} season(s) from the data table {usethis::ui_value(tblname)} in your connected database...")} + DBI::dbExecute(db_conn, glue::glue_sql("DELETE FROM {`tblname`} WHERE season IN ({vals*})", vals = rebuild, .con = db_conn)) + seasons <- valid_seasons %>% dplyr::filter(.data$season %in% rebuild) %>% dplyr::pull("season") + usethis::ui_todo("{my_time()} | Starting download of the {string} season(s)...") + } else if (all(rebuild == "NEW")) { + usethis::ui_info("{my_time()} | Can't find the data table {usethis::ui_value(tblname)} in your database. Will load the play by play data from scratch.") + seasons <- valid_seasons %>% dplyr::pull("season") + usethis::ui_todo("{my_time()} | Starting download of {length(seasons)} seasons between {min(seasons)} and {max(seasons)}...") + } else { + seasons <- NULL + usethis::ui_oops("{my_time()} | At least one invalid value passed to argument {usethis::ui_code('force_rebuild')}. Please try again with valid input.") + } + + if (!is.null(seasons)) { + # this function lives in R/utils.R + load_cfb_pbp(seasons, dbConnection = db_conn, tablename = tblname, qs = FALSE) + } +} + +# this is a helper function to check a list of completed games +# against the games that exist in a database connection +get_missing_cfb_games <- function(completed_games, dbConnection, tablename) { + db_ids <- dplyr::tbl(dbConnection, tablename) %>% + dplyr::select("game_id") %>% + dplyr::distinct() %>% + dplyr::collect() %>% + dplyr::pull("game_id") + + need_scrape <- completed_games[!completed_games %in% db_ids] + + usethis::ui_info("{my_time()} | You have {length(db_ids)} games and are missing {length(need_scrape)}.") + return(need_scrape) +} diff --git a/R/cfbd_games.R b/R/cfbd_games.R index d030f6d6..148a54ad 100644 --- a/R/cfbd_games.R +++ b/R/cfbd_games.R @@ -97,6 +97,7 @@ NULL #' \item{`away_points`: integer.}{Away team points.} #' \item{`away_post_win_prob`: character.}{Away team post-game win probability.} #' \item{`excitement_index`: character.}{Game excitement index.} +#' \item{`highlights`: character.}{Link to game highlights.} #' } #' @source \url{https://api.collegefootballdata.com/games} #' @keywords Game Info diff --git a/README.Rmd b/README.Rmd index f8241aa7..782e1685 100644 --- a/README.Rmd +++ b/README.Rmd @@ -80,14 +80,24 @@ devtools::install_github(repo = "saiemgilani/cfbfastR") [**Full News on Releases**](https://saiemgilani.github.io/cfbfastR/news/index.html) -# **cfbfastR v1.3.3** -### Hotfix [```cfbd_game_player_stats()```](https://saiemgilani.github.io/cfbfastR/reference/cfbd_game_player_stats.html) +# **cfbfastR v1.9.9** + +### Expected points and Win Probability data back to 2003 + +- Added to the package functions [```load_cfb_pbp()```](https://saiemgilani.github.io/cfbfastR/reference/load_cfb_pbp.html) and [```update_cfb_db()```](https://saiemgilani.github.io/cfbfastR/reference/update_cfb_db.html) + +- There are a few known errors with the calculations that need to be looked into further. One specific one identified by Brendan Farrell noting that short yardage touchdowns for pre-2008 data were often negative EPA plays. This is either a duplication issue or a coding error. #
View more version news +## **cfbfastR v1.3.3** + +### Hotfix [```cfbd_game_player_stats()```](https://saiemgilani.github.io/cfbfastR/reference/cfbd_game_player_stats.html) + + ## **cfbfastR v1.3.2** ### Added ID linking to [```cfbd_recruiting_players()```](https://saiemgilani.github.io/cfbfastR/reference/cfbd_recruiting_player.html) diff --git a/_pkgdown.yml b/_pkgdown.yml index cf425dfd..1b57384f 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -53,6 +53,8 @@ home: href: articles/intro.html - text: cfbfastR stats landing page href: https://gameonpaper.com/cfb/ + - text: College Football Data API + href: https://collegefootballdata.com navbar: structure: @@ -70,6 +72,8 @@ navbar: icon: "fas fa-database" text: " Data" menu: + - text: "CFBData.com" + href: https://collegefootballdata.com - text: "Data Repository" href: https://github.com/saiemgilani/cfbfastR-data - text: "Raw Data Repository" @@ -176,12 +180,19 @@ navbar: menu: - text: SportsDataverse href: https://sportsdataverse.org/ + - text: R Packages - text: hoopR href: https://saiemgilani.github.io/hoopR/ - text: wehoop href: https://saiemgilani.github.io/wehoop/ - text: cfbrecruitR href: https://saiemgilani.github.io/cfbrecruitR/ + - text: puntr + href: https://puntalytics.github.io/puntr + - text: gamezoneR + href: https://jacklich10.github.io/gamezoneR/ + # - text: baseballr + # href: https://saiemgilani.github.io/baseballr/ github: icon: "fab fa-github fa-lg" href: https://github.com/saiemgilani/cfbfastR/ diff --git a/man/cfbd_game_info.Rd b/man/cfbd_game_info.Rd index 373116f9..9770bad2 100644 --- a/man/cfbd_game_info.Rd +++ b/man/cfbd_game_info.Rd @@ -70,6 +70,7 @@ I have defaulted the parameter to false so that you will not have to go to the t \item{\code{away_points}: integer.}{Away team points.} \item{\code{away_post_win_prob}: character.}{Away team post-game win probability.} \item{\code{excitement_index}: character.}{Game excitement index.} +\item{\code{highlights}: character.}{Link to game highlights.} } } \description{ diff --git a/man/update_cfb_db.Rd b/man/update_cfb_db.Rd index bad8711b..f26d2827 100644 --- a/man/update_cfb_db.Rd +++ b/man/update_cfb_db.Rd @@ -1,63 +1,63 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/cfb_pbp.R -\name{update_cfb_db} -\alias{update_cfb_db} -\alias{cfb_db} -\alias{cfb} -\alias{database} -\alias{cfb_pbp_db} -\title{\strong{Update or create a cfbfastR play-by-play database}} -\usage{ -update_cfb_db( - dbdir = ".", - dbname = "cfb_pbp_db", - tblname = "cfbfastR_pbp", - force_rebuild = FALSE, - db_connection = NULL -) -} -\arguments{ -\item{dbdir}{Directory in which the database is or shall be located} - -\item{dbname}{File name of an existing or desired SQLite database within \code{dbdir}} - -\item{tblname}{The name of the play by play data table within the database} - -\item{force_rebuild}{Hybrid parameter (logical or numeric) to rebuild parts -of or the complete play by play data table within the database (please see details for further information)} - -\item{db_connection}{A \code{DBIConnection} object, as returned by -\code{\link[DBI:dbConnect]{DBI::dbConnect()}} (please see details for further information)} -} -\description{ -\code{update_cfb_db()} updates or creates a database with \code{cfbfastR} -play by play data of all completed games since 2014. -} -\details{ -This function creates and updates a data table with the name \code{tblname} -within a SQLite database (other drivers via \code{db_connection}) located in -\code{dbdir} and named \code{dbname}. -The data table combines all play by play data for every available game back -to the 2014 season and adds the most recent completed games as soon as they -are available for \code{cfbfastR}. - -The argument \code{force_rebuild} is of hybrid type. It can rebuild the play -by play data table either for the whole cfbfastR era (with \code{force_rebuild = TRUE}) -or just for specified seasons (e.g. \code{force_rebuild = c(2019, 2020)}). -Please note the following behavior: -\itemize{ -\item{\code{force_rebuild = TRUE}}{: The data table with the name \code{tblname} -will be removed completely and rebuilt from scratch. This is helpful when -new columns are added during the Off-Season.} -\item{\code{force_rebuild = c(2019, 2020)}}{: The data table with the name \code{tblname} -will be preserved and only rows from the 2019 and 2020 seasons will be -deleted and re-added. This is intended to be used for ongoing seasons because -ESPN's data provider can make changes to the underlying data during the week.} -} - -The parameter \code{db_connection} is intended for advanced users who want -to use other DBI drivers, such as MariaDB, Postgres or odbc. Please note that -the arguments \code{dbdir} and \code{dbname} are dropped in case a \code{db_connection} -is provided but the argument \code{tblname} will still be used to write the -data table into the database. -} +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cfb_pbp.R +\name{update_cfb_db} +\alias{update_cfb_db} +\alias{cfb_db} +\alias{cfb} +\alias{database} +\alias{cfb_pbp_db} +\title{\strong{Update or create a cfbfastR play-by-play database}} +\usage{ +update_cfb_db( + dbdir = ".", + dbname = "cfb_pbp_db", + tblname = "cfbfastR_pbp", + force_rebuild = FALSE, + db_connection = NULL +) +} +\arguments{ +\item{dbdir}{Directory in which the database is or shall be located} + +\item{dbname}{File name of an existing or desired SQLite database within \code{dbdir}} + +\item{tblname}{The name of the play by play data table within the database} + +\item{force_rebuild}{Hybrid parameter (logical or numeric) to rebuild parts +of or the complete play by play data table within the database (please see details for further information)} + +\item{db_connection}{A \code{DBIConnection} object, as returned by +\code{\link[DBI:dbConnect]{DBI::dbConnect()}} (please see details for further information)} +} +\description{ +\code{update_cfb_db()} updates or creates a database with \code{cfbfastR} +play by play data of all completed and available games since 2003. +} +\details{ +This function creates and updates a data table with the name \code{tblname} +within a SQLite database (other drivers via \code{db_connection}) located in +\code{dbdir} and named \code{dbname}. +The data table combines all play by play data for every available game back +to the 2003 season and adds the most recent completed games as soon as they +are available for \code{cfbfastR}. + +The argument \code{force_rebuild} is of hybrid type. It can rebuild the play +by play data table either for the whole cfbfastR era (with \code{force_rebuild = TRUE}) +or just for specified seasons (e.g. \code{force_rebuild = c(2019, 2020)}). +Please note the following behavior: +\itemize{ +\item{\code{force_rebuild = TRUE}}{: The data table with the name \code{tblname} +will be removed completely and rebuilt from scratch. This is helpful when +new columns are added during the Off-Season.} +\item{\code{force_rebuild = c(2019, 2020)}}{: The data table with the name \code{tblname} +will be preserved and only rows from the 2019 and 2020 seasons will be +deleted and re-added. This is intended to be used for ongoing seasons because +ESPN's data provider can make changes to the underlying data during the week.} +} + +The parameter \code{db_connection} is intended for advanced users who want +to use other DBI drivers, such as MariaDB, Postgres or odbc. Please note that +the arguments \code{dbdir} and \code{dbname} are dropped in case a \code{db_connection} +is provided but the argument \code{tblname} will still be used to write the +data table into the database. +} diff --git a/vignettes/intro.Rmd b/vignettes/intro.Rmd index 3c03776c..cd1d9b49 100644 --- a/vignettes/intro.Rmd +++ b/vignettes/intro.Rmd @@ -180,14 +180,14 @@ year_split19 = lapply(year_split, function(x) { pbp_2019 = bind_rows(year_split19) ``` -## **The fastR way**: `load_cfb_pbp()` (19 seasons, \~1-1.5 minutes `r emo::ji("flame")`) +## **The fastR way**: `load_cfb_pbp()` (18 seasons, \~2-3 minutes `r emo::ji("flame")`) -We are going to load in data for seasons 2002-2020, it'll take between 120-180 seconds to run. +We are going to load in data for seasons 2003-2020, it'll take between 120-180 seconds to run. -```{r load_2002_2020, warning = FALSE} +```{r load_2003_2020, warning = FALSE} tictoc::tic() pbp <- data.frame() -seasons <- 2002:2020 +seasons <- 2003:2020 progressr::with_progress({ future::plan("multisession") pbp <- cfbfastR::load_cfb_pbp(seasons) From ecb913e5ba210238211ec2f133485d94151d8037 Mon Sep 17 00:00:00 2001 From: Saiem Gilani Date: Tue, 29 Jun 2021 12:15:23 -0400 Subject: [PATCH 5/5] update news --- NEWS.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 0257ec6a..91be93f4 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,9 @@ # **cfbfastR v1.9.9** -### Expected points data back to 2003 added to the package function [```load_cfb_pbp()```](https://saiemgilani.github.io/cfbfastR/reference/load_cfb_pbp.html) and [```update_cfb_db()```](https://saiemgilani.github.io/cfbfastR/reference/update_cfb_db.html) + +### Expected points and Win Probability data back to 2003 + +- Added to the package functions [```load_cfb_pbp()```](https://saiemgilani.github.io/cfbfastR/reference/load_cfb_pbp.html) and [```update_cfb_db()```](https://saiemgilani.github.io/cfbfastR/reference/update_cfb_db.html) + - There are a few known errors with the calculations that need to be looked into further. One specific one identified by Brendan Farrell noting that short yardage touchdowns for pre-2008 data were often negative EPA plays. This is either a duplication issue or a coding error. # **cfbfastR v1.3.3**