wsba-hockey 1.1.5__py3-none-any.whl → 1.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
wsba_hockey/__init__.py CHANGED
@@ -1 +1 @@
1
- from wsba_hockey.wsba_main import nhl_scrape_game,nhl_scrape_schedule,nhl_scrape_season,nhl_scrape_seasons_info,nhl_scrape_standings,nhl_scrape_roster,nhl_scrape_draft_rankings,nhl_scrape_prospects,nhl_calculate_stats,nhl_shooting_impacts,nhl_apply_xG,nhl_plot_skaters_shots,nhl_plot_games,repo_load_rosters,repo_load_schedule,repo_load_teaminfo,repo_load_pbp,repo_load_seasons
1
+ from wsba_hockey.wsba_main import nhl_scrape_game,nhl_scrape_schedule,nhl_scrape_season,nhl_scrape_seasons_info,nhl_scrape_standings,nhl_scrape_roster,nhl_scrape_draft_rankings,nhl_scrape_prospects,nhl_calculate_stats,nhl_apply_xG,nhl_plot_skaters_shots,nhl_plot_games,repo_load_rosters,repo_load_schedule,repo_load_teaminfo,repo_load_pbp,repo_load_seasons
@@ -90,14 +90,7 @@ def schedule_info(season: int):
90
90
 
91
91
  @app.get("/nhl/games/{game_id}")
92
92
  def pbp(game_id: int):
93
- info = rs.get(f'https://api-web.nhle.com/v1/gamecenter/{game_id}/play-by-play').json()
94
-
95
- season = info['season']
96
- dataset = ds.dataset(f's3://weakside-breakout/pbp/parquet/nhl_pbp_{season}.parquet', format='parquet')
97
- filter_expr = (ds.field('game_id')==game_id)
98
-
99
- table = dataset.to_table(use_threads=True,filter=filter_expr)
100
- df = table.to_pandas()
93
+ df = pd.read_csv(f'data/sources/20242025/{game_id}.csv')
101
94
 
102
95
  df = df.fillna('')
103
96
 
wsba_hockey/tools/agg.py CHANGED
@@ -89,6 +89,7 @@ def calc_indv(pbp,game_strength,second_group):
89
89
 
90
90
  shot = shot.rename(columns={
91
91
  'Gi':f'{type.capitalize()}Gi',
92
+ 'Si':f'{type.capitalize()}Si',
92
93
  'Fi':f'{type.capitalize()}Fi',
93
94
  'xGi':f'{type.capitalize()}xGi',
94
95
  })
@@ -1,6 +1,7 @@
1
1
  import re
2
2
  import warnings
3
3
  import os
4
+ import asyncio
4
5
  import numpy as np
5
6
  import pandas as pd
6
7
  import requests as rs
@@ -179,7 +180,7 @@ def get_game_info(game_id):
179
180
  'coaches':get_game_coaches(game_id),
180
181
  'json_shifts':json_shifts}
181
182
 
182
- def parse_json(info):
183
+ async def parse_json(info):
183
184
  #Given game info, return JSON document
184
185
 
185
186
  #Retreive data
@@ -340,7 +341,7 @@ def clean_html_pbp(info):
340
341
 
341
342
  return cleaned_html
342
343
 
343
- def parse_html(info):
344
+ async def parse_html(info):
344
345
  #Given game info, return HTML event data
345
346
 
346
347
  #Retreive game information and html events
@@ -561,7 +562,7 @@ def espn_game_id(date,away,home):
561
562
  #Return: ESPN game id
562
563
  return game_id
563
564
 
564
- def parse_espn(date,away,home):
565
+ async def parse_espn(date,away,home):
565
566
  #Given a date formatted as YYYY-MM-DD and teams, return game events
566
567
  game_id = espn_game_id(date,away,home)
567
568
  url = f'https://www.espn.com/nhl/playbyplay/_/gameId/{game_id}'
@@ -711,15 +712,24 @@ def assign_target(data):
711
712
  #Revert sort and return dataframe
712
713
  return data.reset_index()
713
714
 
714
- def combine_pbp(info,sources):
715
+ async def combine_pbp(info,sources):
715
716
  #Given game info, return complete play-by-play data for provided game
716
717
 
717
- html_pbp = parse_html(info)
718
+ #Create tasks
719
+ html_task = asyncio.create_task(parse_html(info))
720
+ if info['season'] in [20052006, 20062007, 20072008, 20082009, 20092010]:
721
+ json_task = asyncio.create_task(parse_espn(str(info['game_date']),info['away_team_abbr'],info['home_team_abbr']))
722
+ json_type = 'espn'
723
+ else:
724
+ json_task = asyncio.create_task(parse_json(info))
725
+ json_type = 'nhl'
718
726
 
727
+ html_pbp, json_pbp = await asyncio.gather(html_task, json_task)
728
+
719
729
  #Route data combining - json if season is after 2009-2010:
720
- if str(info['season']) in ['20052006','20062007','20072008','20082009','20092010']:
730
+ if json_type == 'espn':
721
731
  #ESPN x HTML
722
- espn_pbp = parse_espn(str(info['game_date']),info['away_team_abbr'],info['home_team_abbr']).rename(columns={'coords_x':'x',"coords_y":'y'}).sort_values(['period','seconds_elapsed']).reset_index()
732
+ espn_pbp = json_pbp.rename(columns={'coords_x':'x',"coords_y":'y'}).sort_values(['period','seconds_elapsed']).reset_index()
723
733
  merge_col = ['period','seconds_elapsed','event_type','event_team_abbr']
724
734
 
725
735
  #Merge pbp
@@ -727,8 +737,6 @@ def combine_pbp(info,sources):
727
737
 
728
738
  else:
729
739
  #JSON x HTML
730
- json_pbp = parse_json(info)
731
-
732
740
  if sources:
733
741
  dirs_html = f'sources/{info['season']}/HTML/'
734
742
  dirs_json = f'sources/{info['season']}/JSON/'
@@ -1077,12 +1085,10 @@ def combine_shifts(info,sources):
1077
1085
  #Return: full shifts data converted to play-by-play format
1078
1086
  return full_shifts
1079
1087
 
1080
- def combine_data(info,sources):
1088
+ async def combine_data(info,sources):
1081
1089
  #Given game info, return complete play-by-play data
1082
1090
 
1083
- game_id = info['game_id']
1084
-
1085
- pbp = combine_pbp(info,sources)
1091
+ pbp = await combine_pbp(info,sources)
1086
1092
  shifts = combine_shifts(info,sources)
1087
1093
 
1088
1094
  #Combine data
wsba_hockey/wsba_main.py CHANGED
@@ -2,7 +2,9 @@ import random
2
2
  import os
3
3
  import requests as rs
4
4
  import pandas as pd
5
+ import asyncio
5
6
  import time
7
+ from typing import Literal, Union
6
8
  from datetime import datetime, timedelta, date
7
9
  from wsba_hockey.tools.scraping import *
8
10
  from wsba_hockey.tools.xg_model import *
@@ -13,7 +15,7 @@ from wsba_hockey.tools.plotting import *
13
15
  ## Provided below are all integral functions in the WSBA Hockey Python package. ##
14
16
 
15
17
  ## GLOBAL VARIABLES ##
16
- seasons = [
18
+ SEASONS = [
17
19
  '20072008',
18
20
  '20082009',
19
21
  '20092010',
@@ -34,7 +36,7 @@ seasons = [
34
36
  '20242025'
35
37
  ]
36
38
 
37
- convert_seasons = {'2007': '20072008',
39
+ CONVERT_SEASONS = {'2007': '20072008',
38
40
  '2008': '20082009',
39
41
  '2009': '20092010',
40
42
  '2010': '20102011',
@@ -53,17 +55,17 @@ convert_seasons = {'2007': '20072008',
53
55
  '2023': '20232024',
54
56
  '2024': '20242025'}
55
57
 
56
- convert_team_abbr = {'L.A':'LAK',
58
+ CONVERT_TEAM_ABBR = {'L.A':'LAK',
57
59
  'N.J':'NJD',
58
60
  'S.J':'SJS',
59
61
  'T.B':'TBL',
60
62
  'PHX':'ARI'}
61
63
 
62
- per_sixty = ['Fi','xGi','Gi','A1','A2','P1','P','Si','OZF','NZF','DZF','FF','FA','xGF','xGA','GF','GA','SF','SA','CF','CA','HF','HA','Give','Take','Penl','Penl2','Penl5','Draw','Block','GSAx']
64
+ PER_SIXTY = ['Fi','xGi','Gi','A1','A2','P1','P','Si','OZF','NZF','DZF','FF','FA','xGF','xGA','GF','GA','SF','SA','CF','CA','HF','HA','Give','Take','Penl','Penl2','Penl5','Draw','Block','GSAx']
63
65
 
64
66
  #Some games in the API are specifically known to cause errors in scraping.
65
67
  #This list is updated as frequently as necessary
66
- known_probs = {
68
+ KNOWN_PROBS = {
67
69
  '2007020011':'Missing shifts data for game between Chicago and Minnesota.',
68
70
  '2007021178':'Game between the Bruins and Sabres is missing data after the second period, for some reason.',
69
71
  '2008020259':'HTML data is completely missing for this game.',
@@ -79,11 +81,11 @@ known_probs = {
79
81
  '2019020876':'Due to the frightening collapse of Blues defensemen Jay Bouwmeester, a game on February 2nd, 2020 between the Ducks and Blues was postponed. \nWhen the game resumed, Ducks defensemen Hampus Lindholm, who assisted on a goal in the inital game, did not play in the resumed match.'
80
82
  }
81
83
 
82
- shot_types = ['wrist','deflected','tip-in','slap','backhand','snap','wrap-around','poke','bat','cradle','between-legs']
84
+ SHOT_TYPES = ['wrist','deflected','tip-in','slap','backhand','snap','wrap-around','poke','bat','cradle','between-legs']
83
85
 
84
- new = 2024
86
+ NEW = 2024
85
87
 
86
- standings_end = {
88
+ STANDINGS_END = {
87
89
  '20072008':'04-06',
88
90
  '20082009':'04-12',
89
91
  '20092010':'04-11',
@@ -104,37 +106,49 @@ standings_end = {
104
106
  '20242025':'04-17'
105
107
  }
106
108
 
107
- events = ['faceoff','hit','giveaway','takeaway','blocked-shot','missed-shot','shot-on-goal','goal','penalty']
109
+ EVENTS = ['faceoff','hit','giveaway','takeaway','blocked-shot','missed-shot','shot-on-goal','goal','penalty']
108
110
 
109
- dir = os.path.dirname(os.path.realpath(__file__))
110
- schedule_path = os.path.join(dir,'tools\\schedule\\schedule.csv')
111
- info_path = os.path.join(dir,'tools\\teaminfo\\nhl_teaminfo.csv')
112
- default_roster = os.path.join(dir,'tools\\rosters\\nhl_rosters.csv')
111
+ DIR = os.path.dirname(os.path.realpath(__file__))
112
+ SCHEDULE_PATH = os.path.join(DIR,'tools\\schedule\\schedule.csv')
113
+ INFO_PATH = os.path.join(DIR,'tools\\teaminfo\\nhl_teaminfo.csv')
114
+ DEFAULT_ROSTER = os.path.join(DIR,'tools\\rosters\\nhl_rosters.csv')
113
115
 
114
116
  ## SCRAPE FUNCTIONS ##
115
- def nhl_scrape_game(game_ids,split_shifts = False, remove = ['period-start','period-end','challenge','stoppage','shootout-complete','game-end'],verbose = False, sources = False, errors = False):
116
- #Given a set of game_ids (NHL API), return complete play-by-play information as requested
117
- # param 'game_ids' - NHL game ids (or list formatted as ['random', num_of_games, start_year, end_year])
118
- # param 'split_shifts' - boolean which splits pbp and shift events if true
119
- # param 'remove' - list of events to remove from final dataframe
120
- # param 'xg' - xG model to apply to pbp for aggregation
121
- # param 'verbose' - boolean which adds additional event info if true
122
- # param 'sources - boolean scraping the html and json sources to a master directory if true
123
- # param 'errors' - boolean returning game ids which did not scrape if true
124
-
117
+ def nhl_scrape_game(game_ids:list[int], split_shifts:bool = False, remove:list[str] = [], verbose:bool = False, sources:bool = False, errors:bool = False):
118
+ """
119
+ Given a set of game_ids (NHL API), return complete play-by-play information as requested.
120
+
121
+ Args:
122
+ game_ids (List[int] or ['random', int, int, int]):
123
+ List of NHL game IDs to scrape or use ['random', n, start_year, end_year] to fetch n random games.
124
+ split_shifts (bool, optional):
125
+ If True, returns a dict with separate 'pbp' and 'shifts' DataFrames. Default is False.
126
+ remove (List[str], optional):
127
+ List of event types to remove from the result. Default is an empty list.
128
+ verbose (bool, optional):
129
+ If True, generates extra event features (such as those required to calculate xG). Default is False.
130
+ sources (bool, optional):
131
+ If True, saves raw HTML, JSON, SHIFTS, and single-game full play-by-play to a separate folder in the working directory. Default is False.
132
+ errors (bool, optional):
133
+ If True, includes a list of game IDs that failed to scrape in the return. Default is False.
134
+
135
+ Returns:
136
+ pd.DataFrame:
137
+ If split_shifts is False, returns a single DataFrame of play-by-play data.
138
+ dict[str, pd.DataFrame]:
139
+ If split_shifts is True, returns a dictionary with keys:
140
+ - 'pbp': play-by-play events
141
+ - 'shifts': shift change events
142
+ - 'errors' (optional): list of game IDs that failed if errors=True
143
+ """
144
+
125
145
  pbps = []
126
146
  if game_ids[0] == 'random':
127
147
  #Randomize selection of game_ids
128
148
  #Some ids returned may be invalid (for example, 2020021300)
129
149
  num = game_ids[1]
130
- try:
131
- start = game_ids[2]
132
- except:
133
- start = 2007
134
- try:
135
- end = game_ids[3]
136
- except:
137
- end = (date.today().year)-1
150
+ start = game_ids[2] if len(game_ids) > 1 else 2007
151
+ end = game_ids[3] if len(game_ids) > 2 else (date.today().year)-1
138
152
 
139
153
  game_ids = []
140
154
  i = 0
@@ -161,13 +175,13 @@ def nhl_scrape_game(game_ids,split_shifts = False, remove = ['period-start','per
161
175
  error_ids = []
162
176
  prog = 0
163
177
  for game_id in game_ids:
164
- print("Scraping data from game " + str(game_id) + "...",end="")
178
+ print(f'Scraping data from game {game_id}...',end='')
165
179
  start = time.perf_counter()
166
180
 
167
181
  try:
168
182
  #Retrieve data
169
183
  info = get_game_info(game_id)
170
- data = combine_data(info, sources)
184
+ data = asyncio.run(combine_data(info, sources))
171
185
 
172
186
  #Append data to list
173
187
  pbps.append(data)
@@ -186,19 +200,19 @@ def nhl_scrape_game(game_ids,split_shifts = False, remove = ['period-start','per
186
200
  data.to_csv(f'{dirs}{info['game_id']}.csv',index=False)
187
201
 
188
202
  print(f" finished in {secs:.2f} seconds. {prog}/{len(game_ids)} ({(prog/len(game_ids))*100:.2f}%)")
189
- except:
203
+ except Exception as e:
190
204
  #Games such as the all-star game and pre-season games will incur this error
191
205
  #Other games have known problems
192
- if game_id in known_probs.keys():
193
- print(f"\nGame {game_id} has a known problem: {known_probs[game_id]}")
206
+ if game_id in KNOWN_PROBS.keys():
207
+ print(f"\nGame {game_id} has a known problem: {KNOWN_PROBS[game_id]}")
194
208
  else:
195
- print(f"\nUnable to scrape game {game_id}. Ensure the ID is properly inputted and formatted.")
209
+ print(f"\nUnable to scrape game {game_id}. Exception: {e}")
196
210
 
197
211
  #Track error
198
212
  error_ids.append(game_id)
199
213
 
200
214
  #Add all pbps together
201
- if len(pbps) == 0:
215
+ if not pbps:
202
216
  print("\rNo data returned.")
203
217
  return pd.DataFrame()
204
218
  df = pd.concat(pbps)
@@ -210,7 +224,7 @@ def nhl_scrape_game(game_ids,split_shifts = False, remove = ['period-start','per
210
224
  ""
211
225
 
212
226
  #Print final message
213
- if len(error_ids) > 0:
227
+ if error_ids:
214
228
  print(f'\rScrape of provided games finished.\nThe following games failed to scrape: {error_ids}')
215
229
  else:
216
230
  print('\rScrape of provided games finished.')
@@ -218,7 +232,7 @@ def nhl_scrape_game(game_ids,split_shifts = False, remove = ['period-start','per
218
232
  #Split pbp and shift events if necessary
219
233
  #Return: complete play-by-play with data removed or split as necessary
220
234
 
221
- if split_shifts == True:
235
+ if split_shifts:
222
236
  remove.append('change')
223
237
 
224
238
  #Return: dict with pbp and shifts seperated
@@ -242,22 +256,40 @@ def nhl_scrape_game(game_ids,split_shifts = False, remove = ['period-start','per
242
256
  else:
243
257
  return pbp
244
258
 
245
- def nhl_scrape_schedule(season,start = "09-01", end = "08-01"):
246
- #Given a season, return schedule data
247
- # param 'season' - NHL season to scrape
248
- # param 'start' - Start date in season
249
- # param 'end' - End date in season
259
+ def nhl_scrape_schedule(season:int, start:str = '', end:str = ''):
260
+ """
261
+ Given season and an optional date range, retrieve NHL schedule data.
262
+
263
+ Args:
264
+ season (int):
265
+ The NHL season formatted such as "20242025".
266
+ start (str, optional):
267
+ The date string (MM-DD) to start the schedule scrape at. Default is a blank string.
268
+ end (str, optional):
269
+ The date string (MM-DD) to end the schedule scrape at. Default is a blank string.
270
+
271
+ Returns:
272
+ pd.DataFrame:
273
+ A DataFrame containing the schedule data for the specified season and date range.
274
+ """
250
275
 
251
276
  api = "https://api-web.nhle.com/v1/schedule/"
252
277
 
253
- #Determine how to approach scraping; if month in season is after the new year the year must be adjusted
254
- new_year = ["01","02","03","04","05","06"]
255
- if start[:2] in new_year:
256
- start = str(int(season[:4])+1)+"-"+start
257
- end = str(season[:-4])+"-"+end
278
+ #If either start or end are blank then find start and endpoints for specified season
279
+ if start == '' or end == '':
280
+ season_data = rs.get('https://api.nhle.com/stats/rest/en/season').json()['data']
281
+ season_data = [s for s in season_data if s['id'] == season][0]
282
+ start = season_data['startDate'][0:10]
283
+ end = season_data['endDate'][0:10]
258
284
  else:
259
- start = str(season[:4])+"-"+start
260
- end = str(season[:-4])+"-"+end
285
+ #Determine how to approach scraping; if month in season is after the new year the year must be adjusted
286
+ new_year = ["01","02","03","04","05","06"]
287
+ if start[:2] in new_year:
288
+ start = f'{int(str(season)[:4])+1}-{start}'
289
+ end = f'{str(season)[:-4]}-{end}'
290
+ else:
291
+ start = f'{int(str(season)[:4])}-{start}'
292
+ end = f'{str(season)[:-4]}-{end}'
261
293
 
262
294
  form = '%Y-%m-%d'
263
295
 
@@ -274,9 +306,9 @@ def nhl_scrape_schedule(season,start = "09-01", end = "08-01"):
274
306
  for i in range(day):
275
307
  #For each day, call NHL api and retreive info on all games of selected game
276
308
  inc = start+timedelta(days=i)
277
- print("Scraping games on " + str(inc)[:10]+"...")
309
+ print(f'Scraping games on {str(inc)[:10]}...')
278
310
 
279
- get = rs.get(api+str(inc)[:10]).json()
311
+ get = rs.get(f'{api}{str(inc)[:10]}').json()
280
312
  gameWeek = pd.json_normalize(list(pd.json_normalize(get['gameWeek'])['games'])[0])
281
313
 
282
314
  #Return nothing if there's nothing
@@ -302,43 +334,81 @@ def nhl_scrape_schedule(season,start = "09-01", end = "08-01"):
302
334
  #Return: specificed schedule data
303
335
  return df
304
336
 
305
- def nhl_scrape_season(season,split_shifts = False, season_types = [2,3], remove = ['period-start','period-end','game-end','challenge','stoppage'], start = "09-01", end = "08-01", local=False, local_path = schedule_path, verbose = False, sources = False, errors = False):
306
- #Given season, scrape all play-by-play occuring within the season
307
- # param 'season' - NHL season to scrape
308
- # param 'split_shifts' - boolean which splits pbp and shift events if true
309
- # param 'remove' - list of events to remove from final dataframe
310
- # param 'start' - Start date in season
311
- # param 'end' - End date in season
312
- # param 'local' - boolean indicating whether to use local file to scrape game_ids
313
- # param 'local_path' - path of local file
314
- # param 'verbose' - boolean which adds additional event info if true
315
- # param 'sources - boolean scraping the html and json sources to a master directory if true
316
- # param 'errors' - boolean returning game ids which did not scrape if true
317
-
337
+ def nhl_scrape_season(season:int, split_shifts:bool = False, season_types:list[int] = [2,3], remove:list[str] = [], start:str = '', end:str = '', local:bool=False, local_path:str = SCHEDULE_PATH, verbose:bool = False, sources:bool = False, errors:bool = False):
338
+ """
339
+ Given season, scrape all play-by-play occuring within the season.
340
+
341
+ Args:
342
+ season (int):
343
+ The NHL season formatted such as "20242025".
344
+ split_shifts (bool, optional):
345
+ If True, returns a dict with separate 'pbp' and 'shifts' DataFrames. Default is False.
346
+ season_types (List[int], optional):
347
+ List of season_types to include in scraping process. Default is all regular season and playoff games which are 2 and 3 respectfully.
348
+ remove (List[str], optional):
349
+ List of event types to remove from the result. Default is an empty list.
350
+ start (str, optional):
351
+ The date string (MM-DD) to start the schedule scrape at. Default is a blank string.
352
+ end (str, optional):
353
+ The date string (MM-DD) to end the schedule scrape at. Default is a blank string.
354
+ local (bool, optional):
355
+ If True, use local file to retreive schedule data.
356
+ local_path (bool, optional):
357
+ If True, specifies the path with schedule data necessary to scrape a season's games (only relevant if local = True).
358
+ verbose (bool, optional):
359
+ If True, generates extra event features (such as those required to calculate xG). Default is False.
360
+ sources (bool, optional):
361
+ If True, saves raw HTML, JSON, SHIFTS, and single-game full play-by-play to a separate folder in the working directory. Default is False.
362
+ errors (bool, optional):
363
+ If True, includes a list of game IDs that failed to scrape in the return. Default is False.
364
+
365
+ Returns:
366
+ pd.DataFrame:
367
+ If split_shifts is False, returns a single DataFrame of play-by-play data.
368
+ dict[str, pd.DataFrame]:
369
+ If split_shifts is True, returns a dictionary with keys:
370
+ - 'pbp': play-by-play events
371
+ - 'shifts': shift change events
372
+ - 'errors' (optional): list of game IDs that failed if errors=True
373
+ """
374
+
318
375
  #Determine whether to use schedule data in repository or to scrape
319
376
  if local:
320
377
  load = pd.read_csv(local_path)
321
378
  load['date'] = pd.to_datetime(load['date'])
322
-
323
- start = f'{(season[0:4] if int(start[0:2])>=9 else season[4:8])}-{int(start[0:2])}-{int(start[3:5])}'
324
- end = f'{(season[0:4] if int(end[0:2])>=9 else season[4:8])}-{int(end[0:2])}-{int(end[3:5])}'
325
-
326
- load = load.loc[(load['season'].astype(str)==season)&
379
+
380
+ if start == '' or end == '':
381
+ season_data = rs.get('https://api.nhle.com/stats/rest/en/season').json()['data']
382
+ season_data = [s for s in season_data if s['id'] == season][0]
383
+ start = season_data['startDate'][0:10]
384
+ end = season_data['endDate'][0:10]
385
+
386
+ form = '%Y-%m-%d'
387
+
388
+ #Create datetime values from dates
389
+ start = datetime.strptime(start,form)
390
+ end = datetime.strptime(end,form)
391
+
392
+ else:
393
+ start = f'{(str(season)[0:4] if int(start[0:2])>=9 else str(season)[4:8])}-{start[0:2]}-{start[3:5]}'
394
+ end = f'{(str(season)[0:4] if int(end[0:2])>=9 else str(season)[4:8])}-{end[0:2]}-{end[3:5]}'
395
+
396
+ load = load.loc[(load['season']==season)&
327
397
  (load['season_type'].isin(season_types))&
328
398
  (load['date']>=start)&(load['date']<=end)]
329
399
 
330
- game_ids = list(load['id'].astype(str))
400
+ game_ids = load['id'].to_list()
331
401
  else:
332
402
  load = nhl_scrape_schedule(season,start,end)
333
- load = load.loc[(load['season'].astype(str)==season)&(load['season_type'].isin(season_types))]
334
- game_ids = list(load['id'].astype(str))
403
+ load = load.loc[(load['season']==season)&(load['season_type'].isin(season_types))]
404
+ game_ids = load['id'].to_list()
335
405
 
336
406
  #If no games found, terminate the process
337
407
  if not game_ids:
338
408
  print('No games found for dates in season...')
339
409
  return ""
340
410
 
341
- print(f"Scraping games from {season[0:4]}-{season[4:8]} season...")
411
+ print(f"Scraping games from {str(season)[0:4]}-{str(season)[4:8]} season...")
342
412
  start = time.perf_counter()
343
413
 
344
414
  #Perform scrape
@@ -354,11 +424,22 @@ def nhl_scrape_season(season,split_shifts = False, season_types = [2,3], remove
354
424
  #Return: Complete pbp and shifts data for specified season as well as dataframe of game_ids which failed to return data
355
425
  return data
356
426
 
357
- def nhl_scrape_seasons_info(seasons = []):
358
- #Returns info related to NHL seasons (by default, all seasons are included)
427
+ def nhl_scrape_seasons_info(seasons:list[int] = []):
428
+ """
429
+ Returns info related to NHL seasons (by default, all seasons are included)
430
+ Args:
431
+ seasons (List[int], optional):
432
+ The NHL season formatted such as "20242025".
433
+
434
+ Returns:
435
+ pd.DataFrame:
436
+ A DataFrame containing the information for requested seasons.
437
+ """
438
+
439
+ #
359
440
  # param 'season' - list of seasons to include
360
441
 
361
- print("Scraping info for seasons: " + str(seasons))
442
+ print(f'Scraping info for seasons: {seasons}')
362
443
  api = "https://api.nhle.com/stats/rest/en/season"
363
444
  info = "https://api-web.nhle.com/v1/standings-season"
364
445
  data = rs.get(api).json()['data']
@@ -374,15 +455,23 @@ def nhl_scrape_seasons_info(seasons = []):
374
455
  else:
375
456
  return df.sort_values(by=['id'])
376
457
 
377
- def nhl_scrape_standings(arg = "now", season_type = 2):
378
- #Returns standings
379
- # param 'arg' - by default, this is "now" returning active NHL standings. May also be a specific date formatted as YYYY-MM-DD, a season (scrapes the last standings date for the season) or a year (for playoffs).
380
- # param 'season_type' - by default, this scrapes the regular season standings. If set to 3, it returns the playoff bracket for the specified season
458
+ def nhl_scrape_standings(arg:str = "now", season_type:int = 2):
459
+ """
460
+ Returns standings or playoff bracket
461
+ Args:
462
+ arg (str, optional):
463
+ Date formatted as 'YYYY-MM-DD' to scrape standings for specific date or 'now' for current standings. Default is 'now'.
464
+ season_type (int, optional):
465
+ Part of season to scrape. If 3 (playoffs) then scrape the playoff bracket for the season implied by arg. When arg = 'now' this is ignored. Default is 2.
466
+
467
+ Returns:
468
+ pd.DataFrame:
469
+ A DataFrame containing the standings information (or playoff bracket).
470
+ """
381
471
 
382
- #arg param is ignored when set to "now" if season_type param is 3
383
472
  if season_type == 3:
384
473
  if arg == "now":
385
- arg = new
474
+ arg = NEW
386
475
 
387
476
  print(f"Scraping playoff bracket for date: {arg}")
388
477
  api = f"https://api-web.nhle.com/v1/playoff-bracket/{arg}"
@@ -394,19 +483,29 @@ def nhl_scrape_standings(arg = "now", season_type = 2):
394
483
  else:
395
484
  if arg == "now":
396
485
  print("Scraping standings as of now...")
397
- elif arg in seasons:
486
+ elif arg in SEASONS:
398
487
  print(f'Scraping standings for season: {arg}')
399
488
  else:
400
489
  print(f"Scraping standings for date: {arg}")
401
490
 
402
- api = f"https://api-web.nhle.com/v1/standings/{arg[4:8]}-{standings_end[arg]}"
491
+ api = f"https://api-web.nhle.com/v1/standings/{arg[4:8]}-{STANDINGS_END[arg]}"
403
492
  data = rs.get(api).json()['standings']
404
493
 
405
494
  return pd.json_normalize(data)
406
495
 
407
- def nhl_scrape_roster(season):
408
- #Given a nhl season, return rosters for all participating teams
409
- # param 'season' - NHL season to scrape
496
+ def nhl_scrape_roster(season: int):
497
+ """
498
+ Returns rosters for all teams in a given season.
499
+
500
+ Args:
501
+ season (int):
502
+ The NHL season formatted such as "20242025".
503
+
504
+ Returns:
505
+ pd.DataFrame:
506
+ A DataFrame containing the rosters for all teams in the specified season.
507
+ """
508
+
410
509
  print("Scrpaing rosters for the "+ season + "season...")
411
510
  teaminfo = pd.read_csv(info_path)
412
511
 
@@ -435,8 +534,18 @@ def nhl_scrape_roster(season):
435
534
 
436
535
  return pd.concat(rosts)
437
536
 
438
- def nhl_scrape_prospects(team):
439
- #Given team abbreviation, retreive current team prospects
537
+ def nhl_scrape_prospects(team:str):
538
+ """
539
+ Returns prospects for specified team
540
+
541
+ Args:
542
+ team (str):
543
+ Three character team abbreviation such as 'BOS'
544
+
545
+ Returns:
546
+ pd.DataFrame:
547
+ A DataFrame containing the prospect data for the specified team.
548
+ """
440
549
 
441
550
  api = f'https://api-web.nhle.com/v1/prospects/{team}'
442
551
 
@@ -452,10 +561,20 @@ def nhl_scrape_prospects(team):
452
561
  #Return: team prospects
453
562
  return prospects
454
563
 
455
- def nhl_scrape_team_info(country = False):
456
- #Given option to return franchise or country, return team information
564
+ def nhl_scrape_team_info(country:bool = False):
565
+ """
566
+ Returns team or country information from the NHL API.
567
+
568
+ Args:
569
+ country (bool, optional):
570
+ If True, returns country information instead of NHL team information.
571
+
572
+ Returns:
573
+ pd.DataFrame:
574
+ A DataFrame containing team or country information from the NHL API.
575
+ """
457
576
 
458
- print('Scraping team information...')
577
+ print(f'Scraping {'country' if country else 'team'} information...')
459
578
  api = f'https://api.nhle.com/stats/rest/en/{'country' if country else 'team'}'
460
579
 
461
580
  data = pd.json_normalize(rs.get(api).json()['data'])
@@ -467,8 +586,19 @@ def nhl_scrape_team_info(country = False):
467
586
 
468
587
  return data.sort_values(by=(['country3Code','countryCode','iocCode','countryName'] if country else ['fullName','triCode','id']))
469
588
 
470
- def nhl_scrape_player_data(player_ids):
471
- #Given player id, return player information
589
+ def nhl_scrape_player_data(player_ids:list[int]):
590
+ """
591
+ Returns player data for specified players.
592
+
593
+ Args:
594
+ player_ids (list[int]):
595
+ List of NHL API player IDs to retrieve information for.
596
+
597
+ Returns:
598
+ pd.DataFrame:
599
+ A DataFrame containing player data for specified players.
600
+ """
601
+
472
602
  infos = []
473
603
  for player_id in player_ids:
474
604
  player_id = int(player_id)
@@ -489,15 +619,28 @@ def nhl_scrape_player_data(player_ids):
489
619
  else:
490
620
  return pd.DataFrame()
491
621
 
492
- def nhl_scrape_draft_rankings(arg = 'now', category = ''):
493
- #Given url argument for timeframe and prospect category, return draft rankings
494
- #Category 1 is North American Skaters
495
- #Category 2 is International Skaters
496
- #Category 3 is North American Goalie
497
- #Category 4 is International Goalie
622
+ def nhl_scrape_draft_rankings(arg:str = 'now', category:int = 0):
623
+ """
624
+ Returns draft rankings
625
+ Args:
626
+ arg (str, optional):
627
+ Date formatted as 'YYYY-MM-DD' to scrape draft rankings for specific date or 'now' for current draft rankings. Default is 'now'.
628
+ category (int, optional):
629
+ Category number for prospects. When arg = 'now' this does not apply.
630
+
631
+ - Category 1 is North American Skaters.
632
+ - Category 2 is International Skaters.
633
+ - Category 3 is North American Goalie.
634
+ - Category 4 is International Goalie
635
+
636
+ Default is 0 (all prospects).
637
+ Returns:
638
+ pd.DataFrame:
639
+ A DataFrame containing draft rankings.
640
+ """
498
641
 
499
642
  #Player category only applies when requesting a specific season
500
- api = f"https://api-web.nhle.com/v1/draft/rankings/{arg}/{category}" if category != "" else f"https://api-web.nhle.com/v1/draft/rankings/{arg}"
643
+ api = f"https://api-web.nhle.com/v1/draft/rankings/{arg}/{category}" if category > 0 else f"https://api-web.nhle.com/v1/draft/rankings/{arg}"
501
644
  data = pd.json_normalize(rs.get(api).json()['rankings'])
502
645
 
503
646
  #Add player name columns
@@ -506,10 +649,16 @@ def nhl_scrape_draft_rankings(arg = 'now', category = ''):
506
649
  #Return: prospect rankings
507
650
  return data
508
651
 
509
- def nhl_apply_xG(pbp):
510
- #Given play-by-play data, return this data with xG-related columns
511
-
512
- #param 'pbp' - play-by-play data
652
+ def nhl_apply_xG(pbp: pd.DataFrame):
653
+ """
654
+ Given play-by-play data, return this data with xG-related columns
655
+ Args:
656
+ pbp (pd.DataFrame):
657
+ A DataFrame containing play-by-play data generated within the WBSA Hockey package.
658
+ Returns:
659
+ pd.DataFrame:
660
+ A DataFrame containing input play-by-play data with xG column.
661
+ """
513
662
 
514
663
  print(f'Applying WSBA xG to model with seasons: {pbp['season'].drop_duplicates().to_list()}')
515
664
 
@@ -518,7 +667,7 @@ def nhl_apply_xG(pbp):
518
667
 
519
668
  return pbp
520
669
 
521
- def nhl_shooting_impacts(agg,type):
670
+ def shooting_impacts(agg, type):
522
671
  #Given stats table generated from the nhl_calculate_stats function, return table with shot impacts
523
672
  #Only 5v5 is supported as of now
524
673
 
@@ -687,7 +836,7 @@ def nhl_shooting_impacts(agg,type):
687
836
  pos[f'{group[0]}-FNI-T'] = (pos[f'{group[0]}-FNI']/60)*pos['TOI']
688
837
 
689
838
  #Rank per 60 stats
690
- for stat in per_sixty[11:len(per_sixty)]:
839
+ for stat in PER_SIXTY[11:len(PER_SIXTY)]:
691
840
  pos[f'{stat}/60-P'] = pos[f'{stat}/60'].rank(pct=True)
692
841
 
693
842
  #Flip percentiles for against stats
@@ -788,7 +937,7 @@ def nhl_shooting_impacts(agg,type):
788
937
  pos['RushesFi'] = pos['RushFi/60'].rank(pct=True)
789
938
 
790
939
  #Rank per 60 stats
791
- for stat in per_sixty:
940
+ for stat in PER_SIXTY:
792
941
  pos[f'{stat}/60-P'] = pos[f'{stat}/60'].rank(pct=True)
793
942
 
794
943
  #Flip percentiles for against stats
@@ -868,7 +1017,7 @@ def nhl_shooting_impacts(agg,type):
868
1017
  #Return: skater stats with shooting impacts
869
1018
  return df
870
1019
 
871
- def nhl_calculate_stats(pbp,type,season_types,game_strength,split_game=False,roster_path=default_roster,shot_impact=False):
1020
+ def nhl_calculate_stats(pbp:pd.DataFrame, type:Literal['skater','goalie','team'], season_types:list[int], game_strength: Union[Literal['all'], list[str]], split_game:bool = False, roster_path:str = DEFAULT_ROSTER, shot_impact:bool = False):
872
1021
  #Given play-by-play, seasonal information, game_strength, rosters, and xG model, return aggregated stats
873
1022
  # param 'pbp' - play-by-play dataframe
874
1023
  # param 'type' - type of stats to calculate ('skater', 'goalie', or 'team')
@@ -879,6 +1028,33 @@ def nhl_calculate_stats(pbp,type,season_types,game_strength,split_game=False,ros
879
1028
  # param 'roster_path' - path to roster file
880
1029
  # param 'shot_impact' - boolean determining if the shot impact model will be applied to the dataset
881
1030
 
1031
+ """
1032
+ Given play-by-play data, seasonal information, game strength, rosters, and an xG model,
1033
+ return aggregated statistics at the skater, goalie, or team level.
1034
+
1035
+ Args:
1036
+ pbp (pd.DataFrame):
1037
+ A DataFrame containing play-by-play event data.
1038
+ type (Literal['skater', 'goalie', 'team']):
1039
+ Type of statistics to calculate. Must be one of 'skater', 'goalie', or 'team'.
1040
+ season (int):
1041
+ The NHL season formatted such as "20242025".
1042
+ season_types (List[int], optional):
1043
+ List of season_types to include in scraping process. Default is all regular season and playoff games which are 2 and 3 respectfully.
1044
+ game_strength (str or list[str]):
1045
+ List of game strength states to include (e.g., ['5v5','5v4','4v5']).
1046
+ split_game (bool, optional):
1047
+ If True, aggregates stats separately for each game; otherwise, stats are aggregated across all games. Default is False.
1048
+ roster_path (str, optional):
1049
+ File path to the roster data used for mapping players and teams.
1050
+ shot_impact (bool, optional):
1051
+ If True, applies shot impact metrics to the stats DataFrame. Default is False.
1052
+
1053
+ Returns:
1054
+ pd.DataFrame:
1055
+ A DataFrame containing the aggregated statistics according to the selected parameters.
1056
+ """
1057
+
882
1058
  print(f"Calculating statistics for all games in the provided play-by-play data at {game_strength} for {type}s...\nSeasons included: {pbp['season'].drop_duplicates().to_list()}...")
883
1059
  start = time.perf_counter()
884
1060
 
@@ -911,10 +1087,11 @@ def nhl_calculate_stats(pbp,type,season_types,game_strength,split_game=False,ros
911
1087
  complete['TOI'] = complete['TOI']/60
912
1088
 
913
1089
  #Add per 60 stats
914
- for stat in ['FF','FA','xGF','xGA','GF','GA','CF','CA','GSAx']:
1090
+ for stat in ['FF','FA','xGF','xGA','GF','GA','SF','SA','CF','CA','GSAx']:
915
1091
  complete[f'{stat}/60'] = (complete[stat]/complete['TOI'])*60
916
1092
 
917
1093
  complete['GF%'] = complete['GF']/(complete['GF']+complete['GA'])
1094
+ complete['SF%'] = complete['SF']/(complete['SF']+complete['SA'])
918
1095
  complete['xGF%'] = complete['xGF']/(complete['xGF']+complete['xGA'])
919
1096
  complete['FF%'] = complete['FF']/(complete['FF']+complete['FA'])
920
1097
  complete['CF%'] = complete['CF']/(complete['CF']+complete['CA'])
@@ -953,10 +1130,6 @@ def nhl_calculate_stats(pbp,type,season_types,game_strength,split_game=False,ros
953
1130
  #Find player headshot
954
1131
  complete['Headshot'] = 'https://assets.nhle.com/mugs/nhl/'+complete['Season'].astype(str)+'/'+complete['Team']+'/'+complete['ID'].astype(int).astype(str)+'.png'
955
1132
 
956
- end = time.perf_counter()
957
- length = end-start
958
- print(f'...finished in {(length if length <60 else length/60):.2f} {'seconds' if length <60 else 'minutes'}.')
959
-
960
1133
  head = ['Goalie','ID','Game'] if 'Game' in complete.columns else ['Goalie','ID']
961
1134
  complete = complete[head+[
962
1135
  "Season","Team",'WSBA',
@@ -973,7 +1146,7 @@ def nhl_calculate_stats(pbp,type,season_types,game_strength,split_game=False,ros
973
1146
 
974
1147
  #Apply shot impacts if necessary
975
1148
  if shot_impact:
976
- complete = nhl_shooting_impacts(complete,'goalie')
1149
+ complete = shooting_impacts(complete,'goalie')
977
1150
 
978
1151
  end = time.perf_counter()
979
1152
  length = end-start
@@ -991,7 +1164,7 @@ def nhl_calculate_stats(pbp,type,season_types,game_strength,split_game=False,ros
991
1164
  complete['TOI'] = complete['TOI']/60
992
1165
 
993
1166
  #Add per 60 stats
994
- for stat in per_sixty[11:len(per_sixty)]:
1167
+ for stat in PER_SIXTY[11:len(PER_SIXTY)]:
995
1168
  complete[f'{stat}/60'] = (complete[stat]/complete['TOI'])*60
996
1169
 
997
1170
  complete['GF%'] = complete['GF']/(complete['GF']+complete['GA'])
@@ -1014,10 +1187,10 @@ def nhl_calculate_stats(pbp,type,season_types,game_strength,split_game=False,ros
1014
1187
  'Block',
1015
1188
  'RushF','RushA','RushFxG','RushAxG','RushFG','RushAG',
1016
1189
  'GSAx'
1017
- ]+[f'{stat}/60' for stat in per_sixty[11:len(per_sixty)]]]
1190
+ ]+[f'{stat}/60' for stat in PER_SIXTY[11:len(PER_SIXTY)]]]
1018
1191
  #Apply shot impacts if necessary
1019
1192
  if shot_impact:
1020
- complete = nhl_shooting_impacts(complete,'team')
1193
+ complete = shooting_impacts(complete,'team')
1021
1194
 
1022
1195
  end = time.perf_counter()
1023
1196
  length = end-start
@@ -1085,13 +1258,13 @@ def nhl_calculate_stats(pbp,type,season_types,game_strength,split_game=False,ros
1085
1258
  complete['WSBA'] = complete['Player']+complete['Season'].astype(str)+complete['Team']
1086
1259
 
1087
1260
  #Add per 60 stats
1088
- for stat in per_sixty:
1261
+ for stat in PER_SIXTY:
1089
1262
  complete[f'{stat}/60'] = (complete[stat]/complete['TOI'])*60
1090
1263
 
1091
1264
  #Shot Type Metrics
1092
1265
  type_metrics = []
1093
1266
  for type in shot_types:
1094
- for stat in per_sixty[:3]:
1267
+ for stat in PER_SIXTY[:3]:
1095
1268
  type_metrics.append(f'{type.capitalize()}{stat}')
1096
1269
 
1097
1270
  head = ['Player','ID','Game'] if 'Game' in complete.columns else ['Player','ID']
@@ -1101,7 +1274,7 @@ def nhl_calculate_stats(pbp,type,season_types,game_strength,split_game=False,ros
1101
1274
  'Height (in)','Weight (lbs)',
1102
1275
  'Birthday','Age','Nationality',
1103
1276
  'GP','TOI',
1104
- "Gi","A1","A2",'P1','P','Si','Sh%',
1277
+ "Gi","A1","A2",'P1','P','Si','Shi%',
1105
1278
  'Give','Take','PM%','HF','HA','HF%',
1106
1279
  "Fi","xGi",'xGi/Fi',"Gi/xGi","Fshi%",
1107
1280
  "GF","SF","FF","xGF","xGF/FF","GF/xGF","ShF%","FshF%",
@@ -1116,11 +1289,11 @@ def nhl_calculate_stats(pbp,type,season_types,game_strength,split_game=False,ros
1116
1289
  'OZF','NZF','DZF',
1117
1290
  'OZF%','NZF%','DZF%',
1118
1291
  'GSAx'
1119
- ]+[f'{stat}/60' for stat in per_sixty]+type_metrics].fillna(0).sort_values(['Player','Season','Team','ID'])
1292
+ ]+[f'{stat}/60' for stat in PER_SIXTY]+type_metrics].fillna(0).sort_values(['Player','Season','Team','ID'])
1120
1293
 
1121
1294
  #Apply shot impacts if necessary (Note: this will remove skaters with fewer than 150 minutes of TOI due to the shot impact TOI rule)
1122
1295
  if shot_impact:
1123
- complete = nhl_shooting_impacts(complete,'skater')
1296
+ complete = shooting_impacts(complete,'skater')
1124
1297
 
1125
1298
  end = time.perf_counter()
1126
1299
  length = end-start
@@ -1128,16 +1301,34 @@ def nhl_calculate_stats(pbp,type,season_types,game_strength,split_game=False,ros
1128
1301
 
1129
1302
  return complete
1130
1303
 
1131
- def nhl_plot_skaters_shots(pbp,skater_dict,strengths,marker_dict=event_markers,onice = 'indv',title = True,legend=False):
1132
- #Returns dict of plots for specified skaters
1133
- # param 'pbp' - pbp to plot data
1134
- # param 'skater_dict' - skaters to plot shots for (format: {'Patrice Bergeron':['20242025','BOS']})
1135
- # param 'strengths' - strengths to include in plotting
1136
- # param 'marker_dict' - dict with markers to use for events
1137
- # param 'onice' - can set which shots to include in plotting for the specified skater ('indv', 'for', 'against')
1138
- # param 'title' - bool including title when true
1139
- # param 'legend' - bool which includes legend if true
1140
- # param 'xg' - xG model to apply to pbp for plotting
1304
+ def nhl_plot_skaters_shots(pbp:pd.DataFrame, skater_dict:dict, strengths:Union[Literal['all'], list[str]], marker_dict:dict = event_markers, onice:Literal['indv','for','against'] = ['indv'], title:bool = True, legend:bool = False):
1305
+ """
1306
+ Return a dictionary of shot plots for the specified skaters.
1307
+
1308
+ Args:
1309
+ pbp (pd.DataFrame):
1310
+ A DataFrame containing play-by-play event data to be visualized.
1311
+ skater_dict (dict[str, list[str]]):
1312
+ Dictionary of skaters to plot, where each key is a player name and the value is a list
1313
+ with season and team info (e.g., {'Patrice Bergeron': ['20242025', 'BOS']}).
1314
+ strengths (str or list[str]):
1315
+ List of game strength states to include (e.g., ['5v5','5v4','4v5']).
1316
+ marker_dict (dict[str, dict], optional):
1317
+ Dictionary of event types mapped to marker styles used in plotting.
1318
+ onice (Literal['indv', 'for', 'against'], optional):
1319
+ Determines which shot events to include for the player:
1320
+ - 'indv': only the player's own shots,
1321
+ - 'for': shots taken by the player's team while they are on ice,
1322
+ - 'against': shots taken by the opposing team while the player is on ice.
1323
+ title (bool, optional):
1324
+ Whether to include a plot title.
1325
+ legend (bool, optional):
1326
+ Whether to include a legend on the plots.
1327
+
1328
+ Returns:
1329
+ dict[str, matplotlib.figure.Figure]:
1330
+ A dictionary mapping each skater’s name to their corresponding matplotlib shot plot figure.
1331
+ """
1141
1332
 
1142
1333
  print(f'Plotting the following skater shots: {skater_dict}...')
1143
1334
 
@@ -1152,15 +1343,28 @@ def nhl_plot_skaters_shots(pbp,skater_dict,strengths,marker_dict=event_markers,o
1152
1343
  #Return: list of plotted skater shot charts
1153
1344
  return skater_plots
1154
1345
 
1155
- def nhl_plot_games(pbp,events,strengths,game_ids='all',marker_dict=event_markers,team_colors={'away':'primary','home':'primary'},legend=False):
1156
- #Returns dict of plots for specified games
1157
- # param 'pbp' - pbp to plot data
1158
- # param 'events' - type of events to plot
1159
- # param 'strengths' - strengths to include in plotting
1160
- # param 'game_ids' - games to plot (list if not set to 'all')
1161
- # param 'marker_dict' - dict with colors to use for events
1162
- # param 'legend' - bool which includes legend if true
1163
- # param 'xg' - xG model to apply to pbp for plotting
1346
+ def nhl_plot_games(pbp:pd.DataFrame, events:list[str], strengths:Union[Literal['all'], list[str]], game_ids: Union[Literal['all'], list[int]] = 'all', marker_dict:dict = event_markers, team_colors:dict = {'away':'primary','home':'primary'}, legend:bool =False):
1347
+ """
1348
+ Returns a dictionary of event plots for the specified games.
1349
+
1350
+ Args:
1351
+ pbp (pd.DataFrame):
1352
+ A DataFrame containing play-by-play event data.
1353
+ events (list[str]):
1354
+ List of event types to include in the plot (e.g., ['shot-on-goal', 'goal']).
1355
+ strengths (str or list[str]):
1356
+ List of game strength states to include (e.g., ['5v5','5v4','4v5']).
1357
+ game_ids (str or list[int]):
1358
+ List of game IDs to plot. If set to 'all', plots will be generated for all games in the DataFrame.
1359
+ marker_dict (dict[str, dict]):
1360
+ Dictionary mapping event types to marker styles and/or colors used in plotting.
1361
+ legend (bool):
1362
+ Whether to include a legend on the plots.
1363
+
1364
+ Returns:
1365
+ dict[int, matplotlib.figure.Figure]:
1366
+ A dictionary mapping each game ID to its corresponding matplotlib event plot figure.
1367
+ """
1164
1368
 
1165
1369
  #Find games to scrape
1166
1370
  if game_ids == 'all':
@@ -1180,7 +1384,7 @@ def repo_load_rosters(seasons = []):
1180
1384
  #Returns roster data from repository
1181
1385
  # param 'seasons' - list of seasons to include
1182
1386
 
1183
- data = pd.read_csv(default_roster)
1387
+ data = pd.read_csv(DEFAULT_ROSTER)
1184
1388
  if len(seasons)>0:
1185
1389
  data = data.loc[data['season'].isin(seasons)]
1186
1390
 
@@ -1190,7 +1394,7 @@ def repo_load_schedule(seasons = []):
1190
1394
  #Returns schedule data from repository
1191
1395
  # param 'seasons' - list of seasons to include
1192
1396
 
1193
- data = pd.read_csv(schedule_path)
1397
+ data = pd.read_csv(SCHEDULE_PATH)
1194
1398
  if len(seasons)>0:
1195
1399
  data = data.loc[data['season'].isin(seasons)]
1196
1400
 
@@ -1199,7 +1403,7 @@ def repo_load_schedule(seasons = []):
1199
1403
  def repo_load_teaminfo():
1200
1404
  #Returns team data from repository
1201
1405
 
1202
- return pd.read_csv(info_path)
1406
+ return pd.read_csv(INFO_PATH)
1203
1407
 
1204
1408
  def repo_load_pbp(seasons = []):
1205
1409
  #Returns play-by-play data from repository
@@ -1207,11 +1411,11 @@ def repo_load_pbp(seasons = []):
1207
1411
 
1208
1412
  #Add parquet to total
1209
1413
  print(f'Loading play-by-play from the following seasons: {seasons}...')
1210
- dfs = [pd.read_parquet(f"https://weakside-breakout.s3.us-east-2.amazonaws.com/pbp/{season}.parquet") for season in seasons]
1414
+ dfs = [pd.read_parquet(f"https://weakside-breakout.s3.us-east-2.amazonaws.com/pbp/parquet/nhl_pbp_{season}.parquet") for season in seasons]
1211
1415
 
1212
1416
  return pd.concat(dfs)
1213
1417
 
1214
1418
  def repo_load_seasons():
1215
1419
  #List of available seasons to scrape
1216
1420
 
1217
- return seasons
1421
+ return SEASONS
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: wsba_hockey
3
- Version: 1.1.5
3
+ Version: 1.1.7
4
4
  Summary: WeakSide Breakout's complete Python package of access to hockey data, primairly including the scraping of National Hockey League schedule, play-by-play, and shifts information.
5
5
  Author-email: Owen Singh <owenbksingh@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/owensingh38/wsba_hockey/
@@ -1,8 +1,8 @@
1
- wsba_hockey/__init__.py,sha256=yfr8z5PA503iaIQv30ngancwT_WnsuK-tZETKlHcI0M,377
1
+ wsba_hockey/__init__.py,sha256=qye0rq22KeaUzBPH__pqjBA_igwsmHemOAbaY_G2tNY,356
2
2
  wsba_hockey/data_pipelines.py,sha256=SITapG3nbea6-_EsXujMW2JBQxtRaQ33XMcE6ohn2Ko,10853
3
3
  wsba_hockey/workspace.py,sha256=MwuyqyLW0dHNa06WEm60RkvbFoCn8LBXhnki66V-ttY,954
4
- wsba_hockey/wsba_main.py,sha256=RCjq0NwB82P1fXoghFp2HtDyYX5nWZQHFMQvM67Fnuk,54115
5
- wsba_hockey/api/api/index.py,sha256=tABWg5cYCY-fPaNJ6W_bMJKEYrjn93YGy84VlkHzIXA,6853
4
+ wsba_hockey/wsba_main.py,sha256=N5i1y1QtP4jsnsSNKIR_lcAjl_V8oqAlH2YRNTWSUZk,62347
5
+ wsba_hockey/api/api/index.py,sha256=r2keq105Ve8V0JAsSZMIPs9geVHX2Fuxyi4MqnzCt48,6537
6
6
  wsba_hockey/evidence/weakside-breakout/node_modules/duckdb/vendor.py,sha256=lmu0TB0rIYkAuV9-csFJgW-1hJojso_-EZpEoorUUKM,4949
7
7
  wsba_hockey/evidence/weakside-breakout/node_modules/flatted/python/flatted.py,sha256=ke8FuEflns-WlphCcQ9CC0qJqWqX3zEEuak74o6rgE8,3879
8
8
  wsba_hockey/evidence/weakside-breakout/node_modules/flatted/python/test.py,sha256=uTOn6HJd7KeY_PTRvvufv60dmvON3KWp3nnqACj8IlA,2129
@@ -132,17 +132,17 @@ wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/team_heatmaps
132
132
  wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/team_heatmaps/rink_plot.py,sha256=RB_csrnTxlFR4OyFEhZXbHDSR3dP-KgME0xGBR2JE-4,11994
133
133
  wsba_hockey/flask/app.py,sha256=J51iA65h9xyJfLgdH0h2sVSbfIR7xgGd2Oy8bJsmpAk,1873
134
134
  wsba_hockey/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
135
- wsba_hockey/tools/agg.py,sha256=0tO-clSQBloG4YHXuaqFqllew3mU95YU6torJAIOZmM,23320
135
+ wsba_hockey/tools/agg.py,sha256=OkIYd-ApvGVYe2JJLOI21jnDIN5LH8nkeH7eo0reWFI,23364
136
136
  wsba_hockey/tools/plotting.py,sha256=81hBaM7tcwUNB4-tovPn7QreOUz6B2NuI_SR4-djVSk,6029
137
- wsba_hockey/tools/scraping.py,sha256=h6C016U0qmNQpHWMh7Xvn3ud57zKzRbRQ06Odl-rC_I,52573
137
+ wsba_hockey/tools/scraping.py,sha256=-sv29886AWAMhhpJ14282WTolBZni8eXBvj4OtNVY-U,52863
138
138
  wsba_hockey/tools/xg_model.py,sha256=nOr_2RBijLgPmJ0TTs4wbSsORYmRqWCKRjLKDm7sAhI,18342
139
139
  wsba_hockey/tools/archive/old_scraping.py,sha256=hEjMI1RtfeZnf0RBiJFI38oXkLZ3WofeH5xqcF4pzgM,49585
140
140
  wsba_hockey/tools/utils/__init__.py,sha256=vccXhOtzARoR99fmEWU1OEI3qCIdQ9Z42AlRA_BUhrs,114
141
141
  wsba_hockey/tools/utils/config.py,sha256=D3Uk05-YTyrhfReMTTLfNI3HN_rON2uo_CDE9oER3Lg,351
142
142
  wsba_hockey/tools/utils/save_pages.py,sha256=CsyL_0n-b-4pJoUauwU3HpnCO6n69-RlBMJQBd_qGDc,4979
143
143
  wsba_hockey/tools/utils/shared.py,sha256=dH_JwZfia5fib8rksy5sW-mBp0pluBPvw37Vdr8Kap0,14211
144
- wsba_hockey-1.1.5.dist-info/licenses/LICENSE,sha256=Nr_Um1Pd5FQJTWWgm7maZArdtYMbDhzXYSwyJIZDGik,1114
145
- wsba_hockey-1.1.5.dist-info/METADATA,sha256=nvE9FZLlmofZ-X6LG2RM4dYGbZWDbIsbafZbaDrZ6qk,3566
146
- wsba_hockey-1.1.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
147
- wsba_hockey-1.1.5.dist-info/top_level.txt,sha256=acU7s3x-RZC1zGiqCOmO0g267iqCg34lzIfdmYxxGmQ,12
148
- wsba_hockey-1.1.5.dist-info/RECORD,,
144
+ wsba_hockey-1.1.7.dist-info/licenses/LICENSE,sha256=Nr_Um1Pd5FQJTWWgm7maZArdtYMbDhzXYSwyJIZDGik,1114
145
+ wsba_hockey-1.1.7.dist-info/METADATA,sha256=O_B4EEwc9nbOpnAO8KVoA1Vv-mJHIUEuyqNP_OLrx7s,3566
146
+ wsba_hockey-1.1.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
147
+ wsba_hockey-1.1.7.dist-info/top_level.txt,sha256=acU7s3x-RZC1zGiqCOmO0g267iqCg34lzIfdmYxxGmQ,12
148
+ wsba_hockey-1.1.7.dist-info/RECORD,,