wsba-hockey 1.2.0__tar.gz → 1.2.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {wsba_hockey-1.2.0/src/wsba_hockey.egg-info → wsba_hockey-1.2.2}/PKG-INFO +1 -1
- {wsba_hockey-1.2.0 → wsba_hockey-1.2.2}/pyproject.toml +1 -1
- {wsba_hockey-1.2.0 → wsba_hockey-1.2.2}/src/wsba_hockey/__init__.py +1 -0
- wsba_hockey-1.2.2/src/wsba_hockey/tools/game_pred.py +33 -0
- {wsba_hockey-1.2.0 → wsba_hockey-1.2.2}/src/wsba_hockey/wsba_main.py +48 -22
- {wsba_hockey-1.2.0 → wsba_hockey-1.2.2/src/wsba_hockey.egg-info}/PKG-INFO +1 -1
- {wsba_hockey-1.2.0 → wsba_hockey-1.2.2}/src/wsba_hockey.egg-info/SOURCES.txt +1 -0
- {wsba_hockey-1.2.0 → wsba_hockey-1.2.2}/LICENSE +0 -0
- {wsba_hockey-1.2.0 → wsba_hockey-1.2.2}/README.md +0 -0
- {wsba_hockey-1.2.0 → wsba_hockey-1.2.2}/setup.cfg +0 -0
- {wsba_hockey-1.2.0 → wsba_hockey-1.2.2}/src/wsba_hockey/tools/__init__.py +0 -0
- {wsba_hockey-1.2.0 → wsba_hockey-1.2.2}/src/wsba_hockey/tools/agg.py +0 -0
- {wsba_hockey-1.2.0 → wsba_hockey-1.2.2}/src/wsba_hockey/tools/archive/old_scraping.py +0 -0
- {wsba_hockey-1.2.0 → wsba_hockey-1.2.2}/src/wsba_hockey/tools/plotting.py +0 -0
- {wsba_hockey-1.2.0 → wsba_hockey-1.2.2}/src/wsba_hockey/tools/scraping.py +0 -0
- {wsba_hockey-1.2.0 → wsba_hockey-1.2.2}/src/wsba_hockey/tools/utils/__init__.py +0 -0
- {wsba_hockey-1.2.0 → wsba_hockey-1.2.2}/src/wsba_hockey/tools/utils/shared.py +0 -0
- {wsba_hockey-1.2.0 → wsba_hockey-1.2.2}/src/wsba_hockey/tools/xg_model.py +0 -0
- {wsba_hockey-1.2.0 → wsba_hockey-1.2.2}/src/wsba_hockey.egg-info/dependency_links.txt +0 -0
- {wsba_hockey-1.2.0 → wsba_hockey-1.2.2}/src/wsba_hockey.egg-info/top_level.txt +0 -0
- {wsba_hockey-1.2.0 → wsba_hockey-1.2.2}/tests/tests.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: wsba_hockey
|
3
|
-
Version: 1.2.
|
3
|
+
Version: 1.2.2
|
4
4
|
Summary: WeakSide Breakout's complete Python package of access to hockey data, primairly including the scraping of National Hockey League schedule, play-by-play, and shifts information.
|
5
5
|
Author-email: Owen Singh <owenbksingh@gmail.com>
|
6
6
|
Project-URL: Homepage, https://github.com/owensingh38/wsba_hockey/
|
@@ -0,0 +1,33 @@
|
|
1
|
+
import joblib
|
2
|
+
import os
|
3
|
+
import pandas as pd
|
4
|
+
import numpy as np
|
5
|
+
import xgboost as xgb
|
6
|
+
import scipy.sparse as sp
|
7
|
+
import wsba_hockey.wsba_main as wsba
|
8
|
+
import wsba_hockey.tools.scraping as scraping
|
9
|
+
import matplotlib.pyplot as plt
|
10
|
+
from sklearn.calibration import calibration_curve
|
11
|
+
from sklearn.metrics import roc_curve, auc
|
12
|
+
|
13
|
+
### GAME PREDICTION MODEL FUNCTIONS ###
|
14
|
+
# Provided in this file are functions vital to the game prediction model in the WSBA Hockey Python package. #
|
15
|
+
|
16
|
+
## GLOBAL VARIABLES ##
|
17
|
+
dir = os.path.dirname(os.path.realpath(__file__))
|
18
|
+
roster_path = os.path.join(dir,'rosters\\nhl_rosters.csv')
|
19
|
+
schedule_path = os.path.join(dir,'schedule/schedule.csv')
|
20
|
+
|
21
|
+
def prep_game_data(pbp):
|
22
|
+
#Prepare schedule data for model development given full-season pbp
|
23
|
+
|
24
|
+
#Calculate necessary team stats (by game) for the prediction model
|
25
|
+
#The model will evaluate based on three different qualities for valid EV, PP, and SH strength
|
26
|
+
dfs = []
|
27
|
+
for strength in [['5v5'],['5v4'],['4v5']]:
|
28
|
+
team_games = wsba.nhl_calculate_stats(pbp,'team',[2,3],strength,True)
|
29
|
+
team_games['Year'] = team_games['Season'].str[0:4].astype(int)
|
30
|
+
dfs.append(team_games)
|
31
|
+
|
32
|
+
#Place the games in order and create sums for
|
33
|
+
df = pd.concat(dfs).sort_values(by=['Year','Game'])
|
@@ -100,14 +100,12 @@ KNOWN_PROBS = {
|
|
100
100
|
2009020885:'Missing shifts data for game between Sharks and Blue Jackets.',
|
101
101
|
2010020124:'Game between Capitals and Hurricanes is sporadically missing player on-ice data',
|
102
102
|
2012020018:'HTML events contain mislabeled events.',
|
103
|
-
2013020971:'On March 10th, 2014, Stars forward Rich Peverley suffered from a cardiac episode midgame and as a result, the remainder of the game was postponed. \nThe game resumed on April 9th, and the only goal scorer in the game, Blue Jackets forward Nathan Horton, did not appear in the resumed game due to injury. Interestingly, Horton would never play in the NHL again.',
|
104
103
|
2018021133:'Game between Lightning and Capitals has incorrectly labeled event teams (i.e. WSH TAKEAWAY - #71 CIRELLI (Cirelli is a Tampa Bay skater in this game)).',
|
105
|
-
2019020876:'Due to the frightening collapse of Blues defensemen Jay Bouwmeester, a game on February 2nd, 2020 between the Ducks and Blues was postponed. \nWhen the game resumed, Ducks defensemen Hampus Lindholm, who assisted on a goal in the inital game, did not play in the resumed match.'
|
106
104
|
}
|
107
105
|
|
108
106
|
SHOT_TYPES = ['wrist','deflected','tip-in','slap','backhand','snap','wrap-around','poke','bat','cradle','between-legs']
|
109
107
|
|
110
|
-
NEW =
|
108
|
+
NEW = 2025
|
111
109
|
|
112
110
|
EVENTS = ['faceoff','hit','giveaway','takeaway','blocked-shot','missed-shot','shot-on-goal','goal','penalty']
|
113
111
|
|
@@ -461,14 +459,14 @@ def nhl_scrape_seasons_info(seasons:list[int] = []):
|
|
461
459
|
else:
|
462
460
|
return df.sort_values(by=['id'])
|
463
461
|
|
464
|
-
def nhl_scrape_standings(arg:
|
462
|
+
def nhl_scrape_standings(arg:int | list[int] | Literal['now'] = 'now', season_type:int = 2):
|
465
463
|
"""
|
466
464
|
Returns standings or playoff bracket
|
467
465
|
Args:
|
468
|
-
arg (
|
469
|
-
Date formatted as 'YYYY-MM-DD' to scrape standings, NHL season such as "20242025", or 'now' for current standings. Default is 'now'.
|
466
|
+
arg (int or list[int] or str, optional):
|
467
|
+
Date formatted as 'YYYY-MM-DD' to scrape standings, NHL season such as "20242025", list of NHL seasons, or 'now' for current standings. Default is 'now'.
|
470
468
|
season_type (int, optional):
|
471
|
-
Part of season to scrape. If 3 (playoffs) then scrape the playoff bracket for the season implied by arg. When arg = 'now' this is
|
469
|
+
Part of season to scrape. If 3 (playoffs) then scrape the playoff bracket for the season implied by arg. When arg = 'now' this is defaulted to the most recent playoff year. Any dates passed through are parsed as seasons. Default is 2.
|
472
470
|
|
473
471
|
Returns:
|
474
472
|
pd.DataFrame:
|
@@ -477,31 +475,59 @@ def nhl_scrape_standings(arg:str | int = "now", season_type:int = 2):
|
|
477
475
|
|
478
476
|
if season_type == 3:
|
479
477
|
if arg == "now":
|
480
|
-
arg = NEW
|
478
|
+
arg = [NEW]
|
479
|
+
elif type(arg) == int:
|
480
|
+
#Find year from season
|
481
|
+
arg = [str(arg)[4:8]]
|
482
|
+
elif type(arg) == list:
|
483
|
+
#Find year from seasons
|
484
|
+
arg = [str(s)[4:8] for s in arg]
|
485
|
+
else:
|
486
|
+
#Find year from season from date
|
487
|
+
arg = [int(arg[0:4])+1 if (9 < int(arg[5:7]) < 13) else int(arg[0:4])]
|
481
488
|
|
482
|
-
print(f"Scraping playoff bracket for
|
483
|
-
|
489
|
+
print(f"Scraping playoff bracket for season{'s' if len(arg)>1 else ''}: {arg}")
|
490
|
+
|
491
|
+
dfs = []
|
492
|
+
for season in arg:
|
493
|
+
api = f"https://api-web.nhle.com/v1/playoff-bracket/{season}"
|
484
494
|
|
485
|
-
|
495
|
+
data = rs.get(api).json()['series']
|
496
|
+
dfs.append(pd.json_normalize(data))
|
486
497
|
|
487
|
-
|
498
|
+
#Return: playoff bracket
|
499
|
+
return pd.concat(dfs)
|
488
500
|
|
489
501
|
else:
|
490
502
|
if arg == "now":
|
491
503
|
print("Scraping standings as of now...")
|
504
|
+
arg = [arg]
|
492
505
|
elif arg in SEASONS:
|
493
506
|
print(f'Scraping standings for season: {arg}')
|
507
|
+
arg = [arg]
|
508
|
+
elif type(arg) == list:
|
509
|
+
print(f'Scraping standings for seasons: {arg}')
|
494
510
|
else:
|
495
511
|
print(f"Scraping standings for date: {arg}")
|
512
|
+
arg = [arg]
|
513
|
+
|
514
|
+
dfs = []
|
515
|
+
for search in arg:
|
516
|
+
#If the end is an int then its a season otherwise it is either 'now' or a date as a string
|
517
|
+
if type(search) == int:
|
518
|
+
season_data = rs.get('https://api.nhle.com/stats/rest/en/season').json()['data']
|
519
|
+
season_data = [s for s in season_data if s['id'] == search][0]
|
520
|
+
end = season_data['regularSeasonEndDate'][0:10]
|
521
|
+
else:
|
522
|
+
end = search
|
523
|
+
|
524
|
+
api = f"https://api-web.nhle.com/v1/standings/{end}"
|
496
525
|
|
497
|
-
|
498
|
-
|
499
|
-
end = season_data['regularSeasonEndDate'][0:10]
|
500
|
-
|
501
|
-
api = f"https://api-web.nhle.com/v1/standings/{end}"
|
502
|
-
data = rs.get(api).json()['standings']
|
526
|
+
data = rs.get(api).json()['standings']
|
527
|
+
dfs.append(pd.json_normalize(data))
|
503
528
|
|
504
|
-
|
529
|
+
#Return: standings data
|
530
|
+
return pd.concat(dfs)
|
505
531
|
|
506
532
|
def nhl_scrape_roster(season: int):
|
507
533
|
"""
|
@@ -629,7 +655,7 @@ def nhl_scrape_player_data(player_ids:list[int]):
|
|
629
655
|
else:
|
630
656
|
return pd.DataFrame()
|
631
657
|
|
632
|
-
def nhl_scrape_draft_rankings(arg:str = 'now', category:int = 0):
|
658
|
+
def nhl_scrape_draft_rankings(arg:str | Literal['now'] = 'now', category:int = 0):
|
633
659
|
"""
|
634
660
|
Returns draft rankings
|
635
661
|
Args:
|
@@ -1165,7 +1191,7 @@ def nhl_calculate_stats(pbp:pd.DataFrame, type:Literal['skater','goalie','team']
|
|
1165
1191
|
'birthCountry':'Nationality'})
|
1166
1192
|
|
1167
1193
|
#WSBA
|
1168
|
-
complete['WSBA'] = complete['
|
1194
|
+
complete['WSBA'] = complete['ID'].astype(str).str.replace('.0','')+complete['Team']+complete['Season'].astype(str)
|
1169
1195
|
|
1170
1196
|
#Add player age
|
1171
1197
|
complete['Birthday'] = pd.to_datetime(complete['Birthday'])
|
@@ -1306,7 +1332,7 @@ def nhl_calculate_stats(pbp:pd.DataFrame, type:Literal['skater','goalie','team']
|
|
1306
1332
|
#Remove goalies that occasionally appear in a set
|
1307
1333
|
complete = complete.loc[complete['Position']!='G']
|
1308
1334
|
#Add WSBA ID
|
1309
|
-
complete['WSBA'] = complete['
|
1335
|
+
complete['WSBA'] = complete['ID'].astype(str).str.replace('.0','')+complete['Season'].astype(str)+complete['Team']
|
1310
1336
|
|
1311
1337
|
#Add per 60 stats
|
1312
1338
|
for stat in PER_SIXTY:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: wsba_hockey
|
3
|
-
Version: 1.2.
|
3
|
+
Version: 1.2.2
|
4
4
|
Summary: WeakSide Breakout's complete Python package of access to hockey data, primairly including the scraping of National Hockey League schedule, play-by-play, and shifts information.
|
5
5
|
Author-email: Owen Singh <owenbksingh@gmail.com>
|
6
6
|
Project-URL: Homepage, https://github.com/owensingh38/wsba_hockey/
|
@@ -9,6 +9,7 @@ src/wsba_hockey.egg-info/dependency_links.txt
|
|
9
9
|
src/wsba_hockey.egg-info/top_level.txt
|
10
10
|
src/wsba_hockey/tools/__init__.py
|
11
11
|
src/wsba_hockey/tools/agg.py
|
12
|
+
src/wsba_hockey/tools/game_pred.py
|
12
13
|
src/wsba_hockey/tools/plotting.py
|
13
14
|
src/wsba_hockey/tools/scraping.py
|
14
15
|
src/wsba_hockey/tools/xg_model.py
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|