Skip to content

Support Play By Play for Charlotte Hornets (CHH) #310

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: deprecated-team-feature-branch
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions basketball_reference_web_scraper/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,50 @@ class Team(Enum):
VANCOUVER_GRIZZLIES = "VANCOUVER GRIZZLIES"
WASHINGTON_BULLETS = "WASHINGTON BULLETS"

class TeamAbbreviation(Enum):
ATL = "ATL"
BOS = "BOS"
BRK = "BRK"
CHI = "CHI"
CHO = "CHO"
CLE = "CLE"
DAL = "DAL"
DEN = "DEN"
DET = "DET"
GSW = "GSW"
HOU = "HOU"
IND = "IND"
LAC = "LAC"
LAL = "LAL"
MEM = "MEM"
MIA = "MIA"
MIL = "MIL"
MIN = "MIN"
NOP = "NOP"
NYK = "NYK"
OKC = "OKC"
ORL = "ORL"
PHI = "PHI"
PHO = "PHO"
POR = "POR"
SAC = "SAC"
SAS = "SAS"
TOR = "TOR"
UTA = "UTA"
WAS = "WAS"

# DEPRECATED TEAMS
KCK = "KCK"
NJN = "NJN"
NOH = "NOH"
NOK = "NOK"
CHA = "CHA"
CHH = "CHH"
SEA = "SEA"
STL = "STL"
VAN = "VAN"
WSB = "WSB"


class OutputType(Enum):
JSON = "JSON"
Expand Down
30 changes: 20 additions & 10 deletions basketball_reference_web_scraper/http_service.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
import datetime

import requests
from lxml import html

from basketball_reference_web_scraper.data import TEAM_TO_TEAM_ABBREVIATION, TeamTotal, PlayerData
from basketball_reference_web_scraper.data import TeamTotal, PlayerData
from basketball_reference_web_scraper.errors import InvalidDate, InvalidPlayerAndSeason
from basketball_reference_web_scraper.html import DailyLeadersPage, PlayerSeasonBoxScoresPage, PlayerSeasonTotalTable, \
PlayerAdvancedSeasonTotalsTable, PlayByPlayPage, SchedulePage, BoxScoresPage, DailyBoxScoresPage, SearchPage, \
PlayerPage, StandingsPage
from basketball_reference_web_scraper.models.calculators import calculate_team_abbreviation
from basketball_reference_web_scraper.serialization.urls.models import PlayByPlayURLData
from basketball_reference_web_scraper.serialization.urls.serializers import DEFAULT_PLAY_BY_PLAY_URL_SERIALIZER


class HTTPService:
Expand All @@ -26,7 +31,7 @@ def standings(self, season_end_year):

page = StandingsPage(html=html.fromstring(response.content))
return self.parser.parse_division_standings(standings=page.division_standings.eastern_conference_table.rows) + \
self.parser.parse_division_standings(standings=page.division_standings.western_conference_table.rows)
self.parser.parse_division_standings(standings=page.division_standings.western_conference_table.rows)

def player_box_scores(self, day, month, year):
url = '{BASE_URL}/friv/dailyleaders.cgi?month={month}&day={day}&year={year}'.format(
Expand Down Expand Up @@ -65,7 +70,8 @@ def regular_season_player_box_scores(self, player_identifier, season_end_year, i
if page.regular_season_box_scores_table is None:
raise InvalidPlayerAndSeason(player_identifier=player_identifier, season_end_year=season_end_year)

return self.parser.parse_player_season_box_scores(box_scores=page.regular_season_box_scores_table.rows, include_inactive_games=include_inactive_games)
return self.parser.parse_player_season_box_scores(box_scores=page.regular_season_box_scores_table.rows,
include_inactive_games=include_inactive_games)

def playoff_player_box_scores(self, player_identifier, season_end_year, include_inactive_games=False):
# Makes assumption that basketball reference pattern of breaking out player pathing using first character of
Expand All @@ -86,15 +92,19 @@ def playoff_player_box_scores(self, player_identifier, season_end_year, include_
if page.playoff_box_scores_table is None:
raise InvalidPlayerAndSeason(player_identifier=player_identifier, season_end_year=season_end_year)

return self.parser.parse_player_season_box_scores(box_scores=page.playoff_box_scores_table.rows, include_inactive_games=include_inactive_games)
return self.parser.parse_player_season_box_scores(box_scores=page.playoff_box_scores_table.rows,
include_inactive_games=include_inactive_games)

def play_by_play(self, home_team, day, month, year):
add_0_if_needed = lambda s: "0" + s if len(s) == 1 else s

# the hard-coded `0` in the url assumes we always take the first match of the given date and team.
url = "{BASE_URL}/boxscores/pbp/{year}{month}{day}0{team_abbr}.html".format(
BASE_URL=HTTPService.BASE_URL, year=year, month=add_0_if_needed(str(month)), day=add_0_if_needed(str(day)),
team_abbr=TEAM_TO_TEAM_ABBREVIATION[home_team]
try:
date = datetime.date(year=year, month=month, day=day)
except ValueError:
raise InvalidDate(day=day, month=month, year=year)
url = DEFAULT_PLAY_BY_PLAY_URL_SERIALIZER.serialize(
value=PlayByPlayURLData(
date=date,
team_abbreviation=calculate_team_abbreviation(team=home_team, date=date)
)
)
response = requests.get(url=url)
response.raise_for_status()
Expand Down
Empty file.
15 changes: 15 additions & 0 deletions basketball_reference_web_scraper/models/calculators.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import datetime

from basketball_reference_web_scraper.data import TeamAbbreviation, Team
from basketball_reference_web_scraper.models.teams import ABBREVIATIONS_BY_TEAM

# The Charlotte Hornets' (with Basketball Reference abbreviation CHH) last game was May 15th, 2002
# https://www.basketball-reference.com/teams/CHH/2002_games.html
DATE_OF_CHARLOTTE_HORNETS_LAST_GAME = datetime.date(year=2002, month=5, day=15)


def calculate_team_abbreviation(team: Team, date: datetime.date) -> TeamAbbreviation:
if team is Team.CHARLOTTE_HORNETS and date <= DATE_OF_CHARLOTTE_HORNETS_LAST_GAME:
return TeamAbbreviation.CHH

return ABBREVIATIONS_BY_TEAM.get(team)
49 changes: 49 additions & 0 deletions basketball_reference_web_scraper/models/teams.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
from basketball_reference_web_scraper.data import Team, TeamAbbreviation

TEAMS_BY_ABBREVIATION: dict[TeamAbbreviation, Team] = {
TeamAbbreviation.ATL: Team.ATLANTA_HAWKS,
TeamAbbreviation.BOS: Team.BOSTON_CELTICS,
TeamAbbreviation.BRK: Team.BROOKLYN_NETS,
TeamAbbreviation.CHI: Team.CHICAGO_BULLS,
TeamAbbreviation.CHO: Team.CHARLOTTE_HORNETS,
TeamAbbreviation.CLE: Team.CLEVELAND_CAVALIERS,
TeamAbbreviation.DAL: Team.DALLAS_MAVERICKS,
TeamAbbreviation.DEN: Team.DENVER_NUGGETS,
TeamAbbreviation.DET: Team.DETROIT_PISTONS,
TeamAbbreviation.GSW: Team.GOLDEN_STATE_WARRIORS,
TeamAbbreviation.HOU: Team.HOUSTON_ROCKETS,
TeamAbbreviation.IND: Team.INDIANA_PACERS,
TeamAbbreviation.LAC: Team.LOS_ANGELES_CLIPPERS,
TeamAbbreviation.LAL: Team.LOS_ANGELES_LAKERS,
TeamAbbreviation.MEM: Team.MEMPHIS_GRIZZLIES,
TeamAbbreviation.MIA: Team.MIAMI_HEAT,
TeamAbbreviation.MIL: Team.MILWAUKEE_BUCKS,
TeamAbbreviation.MIN: Team.MINNESOTA_TIMBERWOLVES,
TeamAbbreviation.NOP: Team.NEW_ORLEANS_PELICANS,
TeamAbbreviation.NYK: Team.NEW_YORK_KNICKS,
TeamAbbreviation.OKC: Team.OKLAHOMA_CITY_THUNDER,
TeamAbbreviation.ORL: Team.ORLANDO_MAGIC,
TeamAbbreviation.PHI: Team.PHILADELPHIA_76ERS,
TeamAbbreviation.PHO: Team.PHOENIX_SUNS,
TeamAbbreviation.POR: Team.PORTLAND_TRAIL_BLAZERS,
TeamAbbreviation.SAC: Team.SACRAMENTO_KINGS,
TeamAbbreviation.SAS: Team.SAN_ANTONIO_SPURS,
TeamAbbreviation.TOR: Team.TORONTO_RAPTORS,
TeamAbbreviation.UTA: Team.UTAH_JAZZ,
TeamAbbreviation.WAS: Team.WASHINGTON_WIZARDS,

# DEPRECATED TEAMS
TeamAbbreviation.KCK: Team.KANSAS_CITY_KINGS,
TeamAbbreviation.NJN: Team.NEW_JERSEY_NETS,
TeamAbbreviation.NOH: Team.NEW_ORLEANS_HORNETS,
TeamAbbreviation.NOK: Team.NEW_ORLEANS_OKLAHOMA_CITY_HORNETS,
TeamAbbreviation.CHA: Team.CHARLOTTE_BOBCATS,
TeamAbbreviation.CHH: Team.CHARLOTTE_HORNETS,
TeamAbbreviation.SEA: Team.SEATTLE_SUPERSONICS,
TeamAbbreviation.STL: Team.ST_LOUIS_HAWKS,
TeamAbbreviation.VAN: Team.VANCOUVER_GRIZZLIES,
TeamAbbreviation.WSB: Team.WASHINGTON_BULLETS,
}

ABBREVIATIONS_BY_TEAM: dict[Team, TeamAbbreviation] = {v: k for k, v in TEAMS_BY_ABBREVIATION.items()}
ABBREVIATIONS_BY_TEAM[Team.CHARLOTTE_HORNETS] = TeamAbbreviation.CHO
Empty file.
Empty file.
10 changes: 10 additions & 0 deletions basketball_reference_web_scraper/serialization/urls/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
import dataclasses
import datetime

from basketball_reference_web_scraper.data import TeamAbbreviation


@dataclasses.dataclass(frozen=True)
class PlayByPlayURLData:
date: datetime.date
team_abbreviation: TeamAbbreviation
43 changes: 43 additions & 0 deletions basketball_reference_web_scraper/serialization/urls/serializers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
from datetime import date
from urllib.parse import urljoin, SplitResult

from basketball_reference_web_scraper.data import TeamAbbreviation
from basketball_reference_web_scraper.serialization.urls.models import PlayByPlayURLData


# TODO: @jaebradley once Python 3.7 support is deprecated, add a Serializer Protocol
class DateSerializer:
def serialize(self, value: date) -> str:
return value.strftime("%Y%m%d")


class TeamAbbreviationSerializer:
def serialize(self, value: TeamAbbreviation) -> str:
return value.name


class PlayByPlayURLSerializer:
def __init__(self, date_serializer, team_abbreviation_serializer):
self._date_serializer = date_serializer
self._team_abbreviation_serializer = team_abbreviation_serializer

def serialize(self, value: PlayByPlayURLData) -> str:
# From https://docs.python.org/3.9/library/urllib.parse.html#urllib.parse.urlsplit
return SplitResult(
scheme="https",
netloc="www.basketball-reference.com",
path=urljoin(
"/boxscores/",
urljoin(
"pbp/",
f"{self._date_serializer.serialize(value.date)}0{self._team_abbreviation_serializer.serialize(value.team_abbreviation)}.html")),
query="",
fragment=""
).geturl()


# TODO: @jaebradley create a proper singleton metaclass
DEFAULT_PLAY_BY_PLAY_URL_SERIALIZER = PlayByPlayURLSerializer(
date_serializer=DateSerializer(),
team_abbreviation_serializer=TeamAbbreviationSerializer()
)
20 changes: 17 additions & 3 deletions tests/integration/client/test_play_by_play.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,11 +169,25 @@ def test_non_unicode_matches(self, m):
self.assertEqual(len(plays), 509)


class TestErrorCases(TestCase):
class Test20010109CHH(TestCase):
def setUp(self):
with open(os.path.join(
os.path.dirname(__file__),
"../files/play_by_play/200101090CHH.html",
), 'r') as file_input: self._html = file_input.read();

@requests_mock.Mocker()
def test_get_play_by_play_for_day_that_does_not_exist(self, m):
m.get('https://www.basketball-reference.com/boxscores/pbp/201801-10MIL.html', text="Not found", status_code=404)
def test_charlotte_hornets(self, m):
m.get('https://www.basketball-reference.com/boxscores/pbp/200101090CHH.html', text=self._html, status_code=200)
plays = play_by_play(home_team=Team.CHARLOTTE_HORNETS, day=9, month=1, year=2001)
self.assertIsNotNone(plays)
self.assertEqual(len(plays), 576)


class TestErrorCases(TestCase):

@requests_mock.Mocker(real_http=False)
def test_get_play_by_play_for_day_that_does_not_exist(self, _):
self.assertRaisesRegex(
InvalidDate,
"Date with year set to 2018, month set to 1, and day set to -1 is invalid",
Expand Down
3,969 changes: 3,969 additions & 0 deletions tests/integration/files/play_by_play/200101090CHH.html

Large diffs are not rendered by default.

Empty file added tests/unit/models/__init__.py
Empty file.
18 changes: 18 additions & 0 deletions tests/unit/models/test_calculators.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import datetime
import unittest

from basketball_reference_web_scraper.data import TeamAbbreviation, Team
from basketball_reference_web_scraper.models.calculators import calculate_team_abbreviation


class TestCalculatingTeamAbbreviations(unittest.TestCase):
def test_charlotte_hornets_on_may_15_2002(self):
self.assertEqual(
calculate_team_abbreviation(team=Team.CHARLOTTE_HORNETS, date=datetime.date(year=2002, month=5, day=15)),
TeamAbbreviation.CHH
)

def test_charlotte_hornets_on_may_16_2002(self):
self.assertEqual(
calculate_team_abbreviation(team=Team.CHARLOTTE_HORNETS, date=datetime.date(year=2002, month=5, day=16)),
TeamAbbreviation.CHO)
Empty file.
Empty file.
41 changes: 41 additions & 0 deletions tests/unit/serialization/urls/test_serializers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import unittest
from datetime import date
from unittest import mock

from basketball_reference_web_scraper.data import TeamAbbreviation
from basketball_reference_web_scraper.serialization.urls.models import PlayByPlayURLData
from basketball_reference_web_scraper.serialization.urls.serializers import PlayByPlayURLSerializer, \
DEFAULT_PLAY_BY_PLAY_URL_SERIALIZER


class TestPlayByPlayURLSerializer(unittest.TestCase):
def setUp(self):
self.serializer = PlayByPlayURLSerializer(
date_serializer=mock.Mock(name="mock date serializer",
serialize=mock.Mock(return_value="some serialized date")),
team_abbreviation_serializer=mock.Mock(name="mock team abbreviation serializer", serialize=mock.Mock(
return_value="some serialized team abbreviation"))
)

super().setUp()

def test_output(self):
self.assertEqual(
"https://www.basketball-reference.com/boxscores/pbp/some serialized date0some serialized team abbreviation.html",
self.serializer.serialize(value=PlayByPlayURLData(
date=date(year=2001, month=1, day=1),
team_abbreviation=TeamAbbreviation.BOS
))
)


class TestDefaultPlayByPlayURLSerializer(unittest.TestCase):
def test_output(self):
self.assertEqual(
"https://www.basketball-reference.com/boxscores/pbp/200101090CHH.html",
DEFAULT_PLAY_BY_PLAY_URL_SERIALIZER.serialize(
value=PlayByPlayURLData(
date=date(year=2001, month=1, day=9),
team_abbreviation=TeamAbbreviation.CHH
))
)
Loading