wsba-hockey 1.1.5__py3-none-any.whl → 1.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- wsba_hockey/tools/agg.py +1 -0
- wsba_hockey/wsba_main.py +35 -38
- {wsba_hockey-1.1.5.dist-info → wsba_hockey-1.1.6.dist-info}/METADATA +1 -1
- {wsba_hockey-1.1.5.dist-info → wsba_hockey-1.1.6.dist-info}/RECORD +7 -7
- {wsba_hockey-1.1.5.dist-info → wsba_hockey-1.1.6.dist-info}/WHEEL +0 -0
- {wsba_hockey-1.1.5.dist-info → wsba_hockey-1.1.6.dist-info}/licenses/LICENSE +0 -0
- {wsba_hockey-1.1.5.dist-info → wsba_hockey-1.1.6.dist-info}/top_level.txt +0 -0
wsba_hockey/tools/agg.py
CHANGED
wsba_hockey/wsba_main.py
CHANGED
@@ -13,7 +13,7 @@ from wsba_hockey.tools.plotting import *
|
|
13
13
|
## Provided below are all integral functions in the WSBA Hockey Python package. ##
|
14
14
|
|
15
15
|
## GLOBAL VARIABLES ##
|
16
|
-
|
16
|
+
SEASONS = [
|
17
17
|
'20072008',
|
18
18
|
'20082009',
|
19
19
|
'20092010',
|
@@ -34,7 +34,7 @@ seasons = [
|
|
34
34
|
'20242025'
|
35
35
|
]
|
36
36
|
|
37
|
-
|
37
|
+
CONVERT_SEASONS = {'2007': '20072008',
|
38
38
|
'2008': '20082009',
|
39
39
|
'2009': '20092010',
|
40
40
|
'2010': '20102011',
|
@@ -53,17 +53,17 @@ convert_seasons = {'2007': '20072008',
|
|
53
53
|
'2023': '20232024',
|
54
54
|
'2024': '20242025'}
|
55
55
|
|
56
|
-
|
56
|
+
CONVERT_TEAM_ABBR = {'L.A':'LAK',
|
57
57
|
'N.J':'NJD',
|
58
58
|
'S.J':'SJS',
|
59
59
|
'T.B':'TBL',
|
60
60
|
'PHX':'ARI'}
|
61
61
|
|
62
|
-
|
62
|
+
PER_SIXTY = ['Fi','xGi','Gi','A1','A2','P1','P','Si','OZF','NZF','DZF','FF','FA','xGF','xGA','GF','GA','SF','SA','CF','CA','HF','HA','Give','Take','Penl','Penl2','Penl5','Draw','Block','GSAx']
|
63
63
|
|
64
64
|
#Some games in the API are specifically known to cause errors in scraping.
|
65
65
|
#This list is updated as frequently as necessary
|
66
|
-
|
66
|
+
KNOWN_PROBS = {
|
67
67
|
'2007020011':'Missing shifts data for game between Chicago and Minnesota.',
|
68
68
|
'2007021178':'Game between the Bruins and Sabres is missing data after the second period, for some reason.',
|
69
69
|
'2008020259':'HTML data is completely missing for this game.',
|
@@ -79,11 +79,11 @@ known_probs = {
|
|
79
79
|
'2019020876':'Due to the frightening collapse of Blues defensemen Jay Bouwmeester, a game on February 2nd, 2020 between the Ducks and Blues was postponed. \nWhen the game resumed, Ducks defensemen Hampus Lindholm, who assisted on a goal in the inital game, did not play in the resumed match.'
|
80
80
|
}
|
81
81
|
|
82
|
-
|
82
|
+
SHOT_TYPES = ['wrist','deflected','tip-in','slap','backhand','snap','wrap-around','poke','bat','cradle','between-legs']
|
83
83
|
|
84
|
-
|
84
|
+
NEW = 2024
|
85
85
|
|
86
|
-
|
86
|
+
STANDINGS_END = {
|
87
87
|
'20072008':'04-06',
|
88
88
|
'20082009':'04-12',
|
89
89
|
'20092010':'04-11',
|
@@ -104,12 +104,12 @@ standings_end = {
|
|
104
104
|
'20242025':'04-17'
|
105
105
|
}
|
106
106
|
|
107
|
-
|
107
|
+
EVENTS = ['faceoff','hit','giveaway','takeaway','blocked-shot','missed-shot','shot-on-goal','goal','penalty']
|
108
108
|
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
109
|
+
DIR = os.path.dirname(os.path.realpath(__file__))
|
110
|
+
SCHEDULE_PATH = os.path.join(DIR,'tools\\schedule\\schedule.csv')
|
111
|
+
INFO_PATH = os.path.join(DIR,'tools\\teaminfo\\nhl_teaminfo.csv')
|
112
|
+
DEFAULT_ROSTER = os.path.join(DIR,'tools\\rosters\\nhl_rosters.csv')
|
113
113
|
|
114
114
|
## SCRAPE FUNCTIONS ##
|
115
115
|
def nhl_scrape_game(game_ids,split_shifts = False, remove = ['period-start','period-end','challenge','stoppage','shootout-complete','game-end'],verbose = False, sources = False, errors = False):
|
@@ -189,8 +189,8 @@ def nhl_scrape_game(game_ids,split_shifts = False, remove = ['period-start','per
|
|
189
189
|
except:
|
190
190
|
#Games such as the all-star game and pre-season games will incur this error
|
191
191
|
#Other games have known problems
|
192
|
-
if game_id in
|
193
|
-
print(f"\nGame {game_id} has a known problem: {
|
192
|
+
if game_id in KNOWN_PROBS.keys():
|
193
|
+
print(f"\nGame {game_id} has a known problem: {KNOWN_PROBS[game_id]}")
|
194
194
|
else:
|
195
195
|
print(f"\nUnable to scrape game {game_id}. Ensure the ID is properly inputted and formatted.")
|
196
196
|
|
@@ -302,7 +302,7 @@ def nhl_scrape_schedule(season,start = "09-01", end = "08-01"):
|
|
302
302
|
#Return: specificed schedule data
|
303
303
|
return df
|
304
304
|
|
305
|
-
def nhl_scrape_season(season,split_shifts = False, season_types = [2,3], remove = ['period-start','period-end','game-end','challenge','stoppage'], start = "09-01", end = "08-01", local=False, local_path =
|
305
|
+
def nhl_scrape_season(season,split_shifts = False, season_types = [2,3], remove = ['period-start','period-end','game-end','challenge','stoppage'], start = "09-01", end = "08-01", local=False, local_path = SCHEDULE_PATH, verbose = False, sources = False, errors = False):
|
306
306
|
#Given season, scrape all play-by-play occuring within the season
|
307
307
|
# param 'season' - NHL season to scrape
|
308
308
|
# param 'split_shifts' - boolean which splits pbp and shift events if true
|
@@ -382,7 +382,7 @@ def nhl_scrape_standings(arg = "now", season_type = 2):
|
|
382
382
|
#arg param is ignored when set to "now" if season_type param is 3
|
383
383
|
if season_type == 3:
|
384
384
|
if arg == "now":
|
385
|
-
arg =
|
385
|
+
arg = NEW
|
386
386
|
|
387
387
|
print(f"Scraping playoff bracket for date: {arg}")
|
388
388
|
api = f"https://api-web.nhle.com/v1/playoff-bracket/{arg}"
|
@@ -394,12 +394,12 @@ def nhl_scrape_standings(arg = "now", season_type = 2):
|
|
394
394
|
else:
|
395
395
|
if arg == "now":
|
396
396
|
print("Scraping standings as of now...")
|
397
|
-
elif arg in
|
397
|
+
elif arg in SEASONS:
|
398
398
|
print(f'Scraping standings for season: {arg}')
|
399
399
|
else:
|
400
400
|
print(f"Scraping standings for date: {arg}")
|
401
401
|
|
402
|
-
api = f"https://api-web.nhle.com/v1/standings/{arg[4:8]}-{
|
402
|
+
api = f"https://api-web.nhle.com/v1/standings/{arg[4:8]}-{STANDINGS_END[arg]}"
|
403
403
|
data = rs.get(api).json()['standings']
|
404
404
|
|
405
405
|
return pd.json_normalize(data)
|
@@ -687,7 +687,7 @@ def nhl_shooting_impacts(agg,type):
|
|
687
687
|
pos[f'{group[0]}-FNI-T'] = (pos[f'{group[0]}-FNI']/60)*pos['TOI']
|
688
688
|
|
689
689
|
#Rank per 60 stats
|
690
|
-
for stat in
|
690
|
+
for stat in PER_SIXTY[11:len(PER_SIXTY)]:
|
691
691
|
pos[f'{stat}/60-P'] = pos[f'{stat}/60'].rank(pct=True)
|
692
692
|
|
693
693
|
#Flip percentiles for against stats
|
@@ -788,7 +788,7 @@ def nhl_shooting_impacts(agg,type):
|
|
788
788
|
pos['RushesFi'] = pos['RushFi/60'].rank(pct=True)
|
789
789
|
|
790
790
|
#Rank per 60 stats
|
791
|
-
for stat in
|
791
|
+
for stat in PER_SIXTY:
|
792
792
|
pos[f'{stat}/60-P'] = pos[f'{stat}/60'].rank(pct=True)
|
793
793
|
|
794
794
|
#Flip percentiles for against stats
|
@@ -868,7 +868,7 @@ def nhl_shooting_impacts(agg,type):
|
|
868
868
|
#Return: skater stats with shooting impacts
|
869
869
|
return df
|
870
870
|
|
871
|
-
def nhl_calculate_stats(pbp,type,season_types,game_strength,split_game=False,roster_path=
|
871
|
+
def nhl_calculate_stats(pbp,type,season_types,game_strength,split_game=False,roster_path=DEFAULT_ROSTER,shot_impact=False):
|
872
872
|
#Given play-by-play, seasonal information, game_strength, rosters, and xG model, return aggregated stats
|
873
873
|
# param 'pbp' - play-by-play dataframe
|
874
874
|
# param 'type' - type of stats to calculate ('skater', 'goalie', or 'team')
|
@@ -911,10 +911,11 @@ def nhl_calculate_stats(pbp,type,season_types,game_strength,split_game=False,ros
|
|
911
911
|
complete['TOI'] = complete['TOI']/60
|
912
912
|
|
913
913
|
#Add per 60 stats
|
914
|
-
for stat in ['FF','FA','xGF','xGA','GF','GA','CF','CA','GSAx']:
|
914
|
+
for stat in ['FF','FA','xGF','xGA','GF','GA','SF','SA','CF','CA','GSAx']:
|
915
915
|
complete[f'{stat}/60'] = (complete[stat]/complete['TOI'])*60
|
916
916
|
|
917
917
|
complete['GF%'] = complete['GF']/(complete['GF']+complete['GA'])
|
918
|
+
complete['SF%'] = complete['SF']/(complete['SF']+complete['SA'])
|
918
919
|
complete['xGF%'] = complete['xGF']/(complete['xGF']+complete['xGA'])
|
919
920
|
complete['FF%'] = complete['FF']/(complete['FF']+complete['FA'])
|
920
921
|
complete['CF%'] = complete['CF']/(complete['CF']+complete['CA'])
|
@@ -953,10 +954,6 @@ def nhl_calculate_stats(pbp,type,season_types,game_strength,split_game=False,ros
|
|
953
954
|
#Find player headshot
|
954
955
|
complete['Headshot'] = 'https://assets.nhle.com/mugs/nhl/'+complete['Season'].astype(str)+'/'+complete['Team']+'/'+complete['ID'].astype(int).astype(str)+'.png'
|
955
956
|
|
956
|
-
end = time.perf_counter()
|
957
|
-
length = end-start
|
958
|
-
print(f'...finished in {(length if length <60 else length/60):.2f} {'seconds' if length <60 else 'minutes'}.')
|
959
|
-
|
960
957
|
head = ['Goalie','ID','Game'] if 'Game' in complete.columns else ['Goalie','ID']
|
961
958
|
complete = complete[head+[
|
962
959
|
"Season","Team",'WSBA',
|
@@ -991,7 +988,7 @@ def nhl_calculate_stats(pbp,type,season_types,game_strength,split_game=False,ros
|
|
991
988
|
complete['TOI'] = complete['TOI']/60
|
992
989
|
|
993
990
|
#Add per 60 stats
|
994
|
-
for stat in
|
991
|
+
for stat in PER_SIXTY[11:len(PER_SIXTY)]:
|
995
992
|
complete[f'{stat}/60'] = (complete[stat]/complete['TOI'])*60
|
996
993
|
|
997
994
|
complete['GF%'] = complete['GF']/(complete['GF']+complete['GA'])
|
@@ -1014,7 +1011,7 @@ def nhl_calculate_stats(pbp,type,season_types,game_strength,split_game=False,ros
|
|
1014
1011
|
'Block',
|
1015
1012
|
'RushF','RushA','RushFxG','RushAxG','RushFG','RushAG',
|
1016
1013
|
'GSAx'
|
1017
|
-
]+[f'{stat}/60' for stat in
|
1014
|
+
]+[f'{stat}/60' for stat in PER_SIXTY[11:len(PER_SIXTY)]]]
|
1018
1015
|
#Apply shot impacts if necessary
|
1019
1016
|
if shot_impact:
|
1020
1017
|
complete = nhl_shooting_impacts(complete,'team')
|
@@ -1085,13 +1082,13 @@ def nhl_calculate_stats(pbp,type,season_types,game_strength,split_game=False,ros
|
|
1085
1082
|
complete['WSBA'] = complete['Player']+complete['Season'].astype(str)+complete['Team']
|
1086
1083
|
|
1087
1084
|
#Add per 60 stats
|
1088
|
-
for stat in
|
1085
|
+
for stat in PER_SIXTY:
|
1089
1086
|
complete[f'{stat}/60'] = (complete[stat]/complete['TOI'])*60
|
1090
1087
|
|
1091
1088
|
#Shot Type Metrics
|
1092
1089
|
type_metrics = []
|
1093
1090
|
for type in shot_types:
|
1094
|
-
for stat in
|
1091
|
+
for stat in PER_SIXTY[:3]:
|
1095
1092
|
type_metrics.append(f'{type.capitalize()}{stat}')
|
1096
1093
|
|
1097
1094
|
head = ['Player','ID','Game'] if 'Game' in complete.columns else ['Player','ID']
|
@@ -1101,7 +1098,7 @@ def nhl_calculate_stats(pbp,type,season_types,game_strength,split_game=False,ros
|
|
1101
1098
|
'Height (in)','Weight (lbs)',
|
1102
1099
|
'Birthday','Age','Nationality',
|
1103
1100
|
'GP','TOI',
|
1104
|
-
"Gi","A1","A2",'P1','P','Si','
|
1101
|
+
"Gi","A1","A2",'P1','P','Si','Shi%',
|
1105
1102
|
'Give','Take','PM%','HF','HA','HF%',
|
1106
1103
|
"Fi","xGi",'xGi/Fi',"Gi/xGi","Fshi%",
|
1107
1104
|
"GF","SF","FF","xGF","xGF/FF","GF/xGF","ShF%","FshF%",
|
@@ -1116,7 +1113,7 @@ def nhl_calculate_stats(pbp,type,season_types,game_strength,split_game=False,ros
|
|
1116
1113
|
'OZF','NZF','DZF',
|
1117
1114
|
'OZF%','NZF%','DZF%',
|
1118
1115
|
'GSAx'
|
1119
|
-
]+[f'{stat}/60' for stat in
|
1116
|
+
]+[f'{stat}/60' for stat in PER_SIXTY]+type_metrics].fillna(0).sort_values(['Player','Season','Team','ID'])
|
1120
1117
|
|
1121
1118
|
#Apply shot impacts if necessary (Note: this will remove skaters with fewer than 150 minutes of TOI due to the shot impact TOI rule)
|
1122
1119
|
if shot_impact:
|
@@ -1180,7 +1177,7 @@ def repo_load_rosters(seasons = []):
|
|
1180
1177
|
#Returns roster data from repository
|
1181
1178
|
# param 'seasons' - list of seasons to include
|
1182
1179
|
|
1183
|
-
data = pd.read_csv(
|
1180
|
+
data = pd.read_csv(DEFAULT_ROSTER)
|
1184
1181
|
if len(seasons)>0:
|
1185
1182
|
data = data.loc[data['season'].isin(seasons)]
|
1186
1183
|
|
@@ -1190,7 +1187,7 @@ def repo_load_schedule(seasons = []):
|
|
1190
1187
|
#Returns schedule data from repository
|
1191
1188
|
# param 'seasons' - list of seasons to include
|
1192
1189
|
|
1193
|
-
data = pd.read_csv(
|
1190
|
+
data = pd.read_csv(SCHEDULE_PATH)
|
1194
1191
|
if len(seasons)>0:
|
1195
1192
|
data = data.loc[data['season'].isin(seasons)]
|
1196
1193
|
|
@@ -1199,7 +1196,7 @@ def repo_load_schedule(seasons = []):
|
|
1199
1196
|
def repo_load_teaminfo():
|
1200
1197
|
#Returns team data from repository
|
1201
1198
|
|
1202
|
-
return pd.read_csv(
|
1199
|
+
return pd.read_csv(INFO_PATH)
|
1203
1200
|
|
1204
1201
|
def repo_load_pbp(seasons = []):
|
1205
1202
|
#Returns play-by-play data from repository
|
@@ -1207,11 +1204,11 @@ def repo_load_pbp(seasons = []):
|
|
1207
1204
|
|
1208
1205
|
#Add parquet to total
|
1209
1206
|
print(f'Loading play-by-play from the following seasons: {seasons}...')
|
1210
|
-
dfs = [pd.read_parquet(f"https://weakside-breakout.s3.us-east-2.amazonaws.com/pbp/{season}.parquet") for season in seasons]
|
1207
|
+
dfs = [pd.read_parquet(f"https://weakside-breakout.s3.us-east-2.amazonaws.com/pbp/parquet/nhl_pbp_{season}.parquet") for season in seasons]
|
1211
1208
|
|
1212
1209
|
return pd.concat(dfs)
|
1213
1210
|
|
1214
1211
|
def repo_load_seasons():
|
1215
1212
|
#List of available seasons to scrape
|
1216
1213
|
|
1217
|
-
return
|
1214
|
+
return SEASONS
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: wsba_hockey
|
3
|
-
Version: 1.1.
|
3
|
+
Version: 1.1.6
|
4
4
|
Summary: WeakSide Breakout's complete Python package of access to hockey data, primairly including the scraping of National Hockey League schedule, play-by-play, and shifts information.
|
5
5
|
Author-email: Owen Singh <owenbksingh@gmail.com>
|
6
6
|
Project-URL: Homepage, https://github.com/owensingh38/wsba_hockey/
|
@@ -1,7 +1,7 @@
|
|
1
1
|
wsba_hockey/__init__.py,sha256=yfr8z5PA503iaIQv30ngancwT_WnsuK-tZETKlHcI0M,377
|
2
2
|
wsba_hockey/data_pipelines.py,sha256=SITapG3nbea6-_EsXujMW2JBQxtRaQ33XMcE6ohn2Ko,10853
|
3
3
|
wsba_hockey/workspace.py,sha256=MwuyqyLW0dHNa06WEm60RkvbFoCn8LBXhnki66V-ttY,954
|
4
|
-
wsba_hockey/wsba_main.py,sha256=
|
4
|
+
wsba_hockey/wsba_main.py,sha256=Ucies8d27gWtzf8xprnu7hEcqGGHvOza8HCE0O80X-s,54031
|
5
5
|
wsba_hockey/api/api/index.py,sha256=tABWg5cYCY-fPaNJ6W_bMJKEYrjn93YGy84VlkHzIXA,6853
|
6
6
|
wsba_hockey/evidence/weakside-breakout/node_modules/duckdb/vendor.py,sha256=lmu0TB0rIYkAuV9-csFJgW-1hJojso_-EZpEoorUUKM,4949
|
7
7
|
wsba_hockey/evidence/weakside-breakout/node_modules/flatted/python/flatted.py,sha256=ke8FuEflns-WlphCcQ9CC0qJqWqX3zEEuak74o6rgE8,3879
|
@@ -132,7 +132,7 @@ wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/team_heatmaps
|
|
132
132
|
wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/team_heatmaps/rink_plot.py,sha256=RB_csrnTxlFR4OyFEhZXbHDSR3dP-KgME0xGBR2JE-4,11994
|
133
133
|
wsba_hockey/flask/app.py,sha256=J51iA65h9xyJfLgdH0h2sVSbfIR7xgGd2Oy8bJsmpAk,1873
|
134
134
|
wsba_hockey/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
135
|
-
wsba_hockey/tools/agg.py,sha256=
|
135
|
+
wsba_hockey/tools/agg.py,sha256=OkIYd-ApvGVYe2JJLOI21jnDIN5LH8nkeH7eo0reWFI,23364
|
136
136
|
wsba_hockey/tools/plotting.py,sha256=81hBaM7tcwUNB4-tovPn7QreOUz6B2NuI_SR4-djVSk,6029
|
137
137
|
wsba_hockey/tools/scraping.py,sha256=h6C016U0qmNQpHWMh7Xvn3ud57zKzRbRQ06Odl-rC_I,52573
|
138
138
|
wsba_hockey/tools/xg_model.py,sha256=nOr_2RBijLgPmJ0TTs4wbSsORYmRqWCKRjLKDm7sAhI,18342
|
@@ -141,8 +141,8 @@ wsba_hockey/tools/utils/__init__.py,sha256=vccXhOtzARoR99fmEWU1OEI3qCIdQ9Z42AlRA
|
|
141
141
|
wsba_hockey/tools/utils/config.py,sha256=D3Uk05-YTyrhfReMTTLfNI3HN_rON2uo_CDE9oER3Lg,351
|
142
142
|
wsba_hockey/tools/utils/save_pages.py,sha256=CsyL_0n-b-4pJoUauwU3HpnCO6n69-RlBMJQBd_qGDc,4979
|
143
143
|
wsba_hockey/tools/utils/shared.py,sha256=dH_JwZfia5fib8rksy5sW-mBp0pluBPvw37Vdr8Kap0,14211
|
144
|
-
wsba_hockey-1.1.
|
145
|
-
wsba_hockey-1.1.
|
146
|
-
wsba_hockey-1.1.
|
147
|
-
wsba_hockey-1.1.
|
148
|
-
wsba_hockey-1.1.
|
144
|
+
wsba_hockey-1.1.6.dist-info/licenses/LICENSE,sha256=Nr_Um1Pd5FQJTWWgm7maZArdtYMbDhzXYSwyJIZDGik,1114
|
145
|
+
wsba_hockey-1.1.6.dist-info/METADATA,sha256=2CLs8qTA1iS8P7ToF4My86KkMRrt5zYoX9ynbQTS4zk,3566
|
146
|
+
wsba_hockey-1.1.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
147
|
+
wsba_hockey-1.1.6.dist-info/top_level.txt,sha256=acU7s3x-RZC1zGiqCOmO0g267iqCg34lzIfdmYxxGmQ,12
|
148
|
+
wsba_hockey-1.1.6.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|