wsba-hockey 1.2.0__py3-none-any.whl → 1.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
wsba_hockey/__init__.py CHANGED
@@ -7,6 +7,7 @@ from wsba_hockey.wsba_main import (
7
7
  nhl_scrape_roster,
8
8
  nhl_scrape_draft_rankings,
9
9
  nhl_scrape_prospects,
10
+ nhl_scrape_team_info,
10
11
  nhl_scrape_game_info,
11
12
  nhl_calculate_stats,
12
13
  nhl_apply_xG,
@@ -0,0 +1,33 @@
1
+ import joblib
2
+ import os
3
+ import pandas as pd
4
+ import numpy as np
5
+ import xgboost as xgb
6
+ import scipy.sparse as sp
7
+ import wsba_hockey.wsba_main as wsba
8
+ import wsba_hockey.tools.scraping as scraping
9
+ import matplotlib.pyplot as plt
10
+ from sklearn.calibration import calibration_curve
11
+ from sklearn.metrics import roc_curve, auc
12
+
13
+ ### GAME PREDICTION MODEL FUNCTIONS ###
14
+ # Provided in this file are functions vital to the game prediction model in the WSBA Hockey Python package. #
15
+
16
+ ## GLOBAL VARIABLES ##
17
+ dir = os.path.dirname(os.path.realpath(__file__))
18
+ roster_path = os.path.join(dir,'rosters\\nhl_rosters.csv')
19
+ schedule_path = os.path.join(dir,'schedule/schedule.csv')
20
+
21
+ def prep_game_data(pbp):
22
+ #Prepare schedule data for model development given full-season pbp
23
+
24
+ #Calculate necessary team stats (by game) for the prediction model
25
+ #The model will evaluate based on three different qualities for valid EV, PP, and SH strength
26
+ dfs = []
27
+ for strength in [['5v5'],['5v4'],['4v5']]:
28
+ team_games = wsba.nhl_calculate_stats(pbp,'team',[2,3],strength,True)
29
+ team_games['Year'] = team_games['Season'].str[0:4].astype(int)
30
+ dfs.append(team_games)
31
+
32
+ #Place the games in order and create sums for
33
+ df = pd.concat(dfs).sort_values(by=['Year','Game'])
wsba_hockey/wsba_main.py CHANGED
@@ -100,14 +100,12 @@ KNOWN_PROBS = {
100
100
  2009020885:'Missing shifts data for game between Sharks and Blue Jackets.',
101
101
  2010020124:'Game between Capitals and Hurricanes is sporadically missing player on-ice data',
102
102
  2012020018:'HTML events contain mislabeled events.',
103
- 2013020971:'On March 10th, 2014, Stars forward Rich Peverley suffered from a cardiac episode midgame and as a result, the remainder of the game was postponed. \nThe game resumed on April 9th, and the only goal scorer in the game, Blue Jackets forward Nathan Horton, did not appear in the resumed game due to injury. Interestingly, Horton would never play in the NHL again.',
104
103
  2018021133:'Game between Lightning and Capitals has incorrectly labeled event teams (i.e. WSH TAKEAWAY - #71 CIRELLI (Cirelli is a Tampa Bay skater in this game)).',
105
- 2019020876:'Due to the frightening collapse of Blues defensemen Jay Bouwmeester, a game on February 2nd, 2020 between the Ducks and Blues was postponed. \nWhen the game resumed, Ducks defensemen Hampus Lindholm, who assisted on a goal in the inital game, did not play in the resumed match.'
106
104
  }
107
105
 
108
106
  SHOT_TYPES = ['wrist','deflected','tip-in','slap','backhand','snap','wrap-around','poke','bat','cradle','between-legs']
109
107
 
110
- NEW = 2024
108
+ NEW = 2025
111
109
 
112
110
  EVENTS = ['faceoff','hit','giveaway','takeaway','blocked-shot','missed-shot','shot-on-goal','goal','penalty']
113
111
 
@@ -461,14 +459,14 @@ def nhl_scrape_seasons_info(seasons:list[int] = []):
461
459
  else:
462
460
  return df.sort_values(by=['id'])
463
461
 
464
- def nhl_scrape_standings(arg:str | int = "now", season_type:int = 2):
462
+ def nhl_scrape_standings(arg:int | list[int] | Literal['now'] = 'now', season_type:int = 2):
465
463
  """
466
464
  Returns standings or playoff bracket
467
465
  Args:
468
- arg (str or int, optional):
469
- Date formatted as 'YYYY-MM-DD' to scrape standings, NHL season such as "20242025", or 'now' for current standings. Default is 'now'.
466
+ arg (int or list[int] or str, optional):
467
+ Date formatted as 'YYYY-MM-DD' to scrape standings, NHL season such as "20242025", list of NHL seasons, or 'now' for current standings. Default is 'now'.
470
468
  season_type (int, optional):
471
- Part of season to scrape. If 3 (playoffs) then scrape the playoff bracket for the season implied by arg. When arg = 'now' this is ignored. Default is 2.
469
+ Part of season to scrape. If 3 (playoffs) then scrape the playoff bracket for the season implied by arg. When arg = 'now' this is defaulted to the most recent playoff year. Any dates passed through are parsed as seasons. Default is 2.
472
470
 
473
471
  Returns:
474
472
  pd.DataFrame:
@@ -477,31 +475,59 @@ def nhl_scrape_standings(arg:str | int = "now", season_type:int = 2):
477
475
 
478
476
  if season_type == 3:
479
477
  if arg == "now":
480
- arg = NEW
478
+ arg = [NEW]
479
+ elif type(arg) == int:
480
+ #Find year from season
481
+ arg = [str(arg)[4:8]]
482
+ elif type(arg) == list:
483
+ #Find year from seasons
484
+ arg = [str(s)[4:8] for s in arg]
485
+ else:
486
+ #Find year from season from date
487
+ arg = [int(arg[0:4])+1 if (9 < int(arg[5:7]) < 13) else int(arg[0:4])]
481
488
 
482
- print(f"Scraping playoff bracket for date: {arg}")
483
- api = f"https://api-web.nhle.com/v1/playoff-bracket/{arg}"
489
+ print(f"Scraping playoff bracket for season{'s' if len(arg)>1 else ''}: {arg}")
490
+
491
+ dfs = []
492
+ for season in arg:
493
+ api = f"https://api-web.nhle.com/v1/playoff-bracket/{season}"
484
494
 
485
- data = rs.get(api).json()['series']
495
+ data = rs.get(api).json()['series']
496
+ dfs.append(pd.json_normalize(data))
486
497
 
487
- return pd.json_normalize(data)
498
+ #Return: playoff bracket
499
+ return pd.concat(dfs)
488
500
 
489
501
  else:
490
502
  if arg == "now":
491
503
  print("Scraping standings as of now...")
504
+ arg = [arg]
492
505
  elif arg in SEASONS:
493
506
  print(f'Scraping standings for season: {arg}')
507
+ arg = [arg]
508
+ elif type(arg) == list:
509
+ print(f'Scraping standings for seasons: {arg}')
494
510
  else:
495
511
  print(f"Scraping standings for date: {arg}")
512
+ arg = [arg]
513
+
514
+ dfs = []
515
+ for search in arg:
516
+ #If the end is an int then its a season otherwise it is either 'now' or a date as a string
517
+ if type(search) == int:
518
+ season_data = rs.get('https://api.nhle.com/stats/rest/en/season').json()['data']
519
+ season_data = [s for s in season_data if s['id'] == search][0]
520
+ end = season_data['regularSeasonEndDate'][0:10]
521
+ else:
522
+ end = search
523
+
524
+ api = f"https://api-web.nhle.com/v1/standings/{end}"
496
525
 
497
- season_data = rs.get('https://api.nhle.com/stats/rest/en/season').json()['data']
498
- season_data = [s for s in season_data if s['id'] == arg][0]
499
- end = season_data['regularSeasonEndDate'][0:10]
500
-
501
- api = f"https://api-web.nhle.com/v1/standings/{end}"
502
- data = rs.get(api).json()['standings']
526
+ data = rs.get(api).json()['standings']
527
+ dfs.append(pd.json_normalize(data))
503
528
 
504
- return pd.json_normalize(data)
529
+ #Return: standings data
530
+ return pd.concat(dfs)
505
531
 
506
532
  def nhl_scrape_roster(season: int):
507
533
  """
@@ -629,7 +655,7 @@ def nhl_scrape_player_data(player_ids:list[int]):
629
655
  else:
630
656
  return pd.DataFrame()
631
657
 
632
- def nhl_scrape_draft_rankings(arg:str = 'now', category:int = 0):
658
+ def nhl_scrape_draft_rankings(arg:str | Literal['now'] = 'now', category:int = 0):
633
659
  """
634
660
  Returns draft rankings
635
661
  Args:
@@ -1165,7 +1191,7 @@ def nhl_calculate_stats(pbp:pd.DataFrame, type:Literal['skater','goalie','team']
1165
1191
  'birthCountry':'Nationality'})
1166
1192
 
1167
1193
  #WSBA
1168
- complete['WSBA'] = complete['Goalie']+complete['Team']+complete['Season'].astype(str)
1194
+ complete['WSBA'] = complete['ID'].astype(str).str.replace('.0','')+complete['Team']+complete['Season'].astype(str)
1169
1195
 
1170
1196
  #Add player age
1171
1197
  complete['Birthday'] = pd.to_datetime(complete['Birthday'])
@@ -1306,7 +1332,7 @@ def nhl_calculate_stats(pbp:pd.DataFrame, type:Literal['skater','goalie','team']
1306
1332
  #Remove goalies that occasionally appear in a set
1307
1333
  complete = complete.loc[complete['Position']!='G']
1308
1334
  #Add WSBA ID
1309
- complete['WSBA'] = complete['Player']+complete['Season'].astype(str)+complete['Team']
1335
+ complete['WSBA'] = complete['ID'].astype(str).str.replace('.0','')+complete['Season'].astype(str)+complete['Team']
1310
1336
 
1311
1337
  #Add per 60 stats
1312
1338
  for stat in PER_SIXTY:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: wsba_hockey
3
- Version: 1.2.0
3
+ Version: 1.2.2
4
4
  Summary: WeakSide Breakout's complete Python package of access to hockey data, primairly including the scraping of National Hockey League schedule, play-by-play, and shifts information.
5
5
  Author-email: Owen Singh <owenbksingh@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/owensingh38/wsba_hockey/
@@ -1,15 +1,16 @@
1
- wsba_hockey/__init__.py,sha256=rXczecTEm4W8F9RbFmIttxpX9aqiq7PF5KNDLQVY35w,539
2
- wsba_hockey/wsba_main.py,sha256=ELWCYcXCKtKb8Ha_kKQ9_sBJsgk4_M2Y3pw45ZQJjsM,73567
1
+ wsba_hockey/__init__.py,sha256=4wdj-GjqGGb3BnnyLlvRXYS7wNoaLAzkfVnz6kM8v7g,566
2
+ wsba_hockey/wsba_main.py,sha256=iXh4OSgqLLuxBV-5WthQ-2ISESo3qusm9InM_K7KQKc,74170
3
3
  wsba_hockey/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  wsba_hockey/tools/agg.py,sha256=OkIYd-ApvGVYe2JJLOI21jnDIN5LH8nkeH7eo0reWFI,23364
5
+ wsba_hockey/tools/game_pred.py,sha256=OGh6o1vIcyLUixU80hOO0RPGNmDSY1cvvCNZFcP0wL4,1308
5
6
  wsba_hockey/tools/plotting.py,sha256=81hBaM7tcwUNB4-tovPn7QreOUz6B2NuI_SR4-djVSk,6029
6
7
  wsba_hockey/tools/scraping.py,sha256=6_GyF8o56fuijTosm4x4OSrvpL61ZygluK2A26XajqU,52246
7
8
  wsba_hockey/tools/xg_model.py,sha256=OqSvr1Er3zGaY6ZTBnuulBTPLO6CPhNk97SwpnkRD6M,18686
8
9
  wsba_hockey/tools/archive/old_scraping.py,sha256=hEjMI1RtfeZnf0RBiJFI38oXkLZ3WofeH5xqcF4pzgM,49585
9
10
  wsba_hockey/tools/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
11
  wsba_hockey/tools/utils/shared.py,sha256=KxeQVttGem73yncAlnuZvTclqpJpoerTKtLusRh5zsk,2472
11
- wsba_hockey-1.2.0.dist-info/licenses/LICENSE,sha256=Nr_Um1Pd5FQJTWWgm7maZArdtYMbDhzXYSwyJIZDGik,1114
12
- wsba_hockey-1.2.0.dist-info/METADATA,sha256=isZ9jsdKaJ0fSgtFNy65TUIyKeNbWX6pjekjzsYEgPk,3592
13
- wsba_hockey-1.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
14
- wsba_hockey-1.2.0.dist-info/top_level.txt,sha256=acU7s3x-RZC1zGiqCOmO0g267iqCg34lzIfdmYxxGmQ,12
15
- wsba_hockey-1.2.0.dist-info/RECORD,,
12
+ wsba_hockey-1.2.2.dist-info/licenses/LICENSE,sha256=Nr_Um1Pd5FQJTWWgm7maZArdtYMbDhzXYSwyJIZDGik,1114
13
+ wsba_hockey-1.2.2.dist-info/METADATA,sha256=8lZANGgWtAMVI7QfEr2cP66kM8WzUXv-YYeV6z1IL4A,3592
14
+ wsba_hockey-1.2.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
15
+ wsba_hockey-1.2.2.dist-info/top_level.txt,sha256=acU7s3x-RZC1zGiqCOmO0g267iqCg34lzIfdmYxxGmQ,12
16
+ wsba_hockey-1.2.2.dist-info/RECORD,,