wsba-hockey 1.1.1__py3-none-any.whl → 1.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- wsba_hockey/api/api/index.py +1 -1
- wsba_hockey/tools/plotting.py +1 -1
- wsba_hockey/tools/scraping.py +1 -1
- wsba_hockey/tools/xg_model.py +3 -3
- {wsba_hockey-1.1.1.dist-info → wsba_hockey-1.1.3.dist-info}/METADATA +1 -1
- {wsba_hockey-1.1.1.dist-info → wsba_hockey-1.1.3.dist-info}/RECORD +9 -21
- wsba_hockey/api/api/main.py +0 -4
- wsba_hockey/api/api/tools/__init__.py +0 -0
- wsba_hockey/api/api/tools/agg.py +0 -374
- wsba_hockey/api/api/tools/archive/old_scraping.py +0 -1104
- wsba_hockey/api/api/tools/plotting.py +0 -144
- wsba_hockey/api/api/tools/scraping.py +0 -1000
- wsba_hockey/api/api/tools/utils/__init__.py +0 -1
- wsba_hockey/api/api/tools/utils/config.py +0 -14
- wsba_hockey/api/api/tools/utils/save_pages.py +0 -133
- wsba_hockey/api/api/tools/utils/shared.py +0 -450
- wsba_hockey/api/api/tools/xg_model.py +0 -455
- wsba_hockey/api/api/wsba_main.py +0 -1213
- {wsba_hockey-1.1.1.dist-info → wsba_hockey-1.1.3.dist-info}/WHEEL +0 -0
- {wsba_hockey-1.1.1.dist-info → wsba_hockey-1.1.3.dist-info}/licenses/LICENSE +0 -0
- {wsba_hockey-1.1.1.dist-info → wsba_hockey-1.1.3.dist-info}/top_level.txt +0 -0
wsba_hockey/api/api/wsba_main.py
DELETED
@@ -1,1213 +0,0 @@
|
|
1
|
-
import random
|
2
|
-
import os
|
3
|
-
import requests as rs
|
4
|
-
import pandas as pd
|
5
|
-
import time
|
6
|
-
from datetime import datetime, timedelta, date
|
7
|
-
from tools.scraping import *
|
8
|
-
from tools.xg_model import *
|
9
|
-
from tools.agg import *
|
10
|
-
from tools.plotting import *
|
11
|
-
|
12
|
-
### WSBA HOCKEY ###
|
13
|
-
## Provided below are all integral functions in the WSBA Hockey Python package. ##
|
14
|
-
|
15
|
-
## GLOBAL VARIABLES ##
|
16
|
-
seasons = [
|
17
|
-
'20072008',
|
18
|
-
'20082009',
|
19
|
-
'20092010',
|
20
|
-
'20102011',
|
21
|
-
'20112012',
|
22
|
-
'20122013',
|
23
|
-
'20132014',
|
24
|
-
'20142015',
|
25
|
-
'20152016',
|
26
|
-
'20162017',
|
27
|
-
'20172018',
|
28
|
-
'20182019',
|
29
|
-
'20192020',
|
30
|
-
'20202021',
|
31
|
-
'20212022',
|
32
|
-
'20222023',
|
33
|
-
'20232024',
|
34
|
-
'20242025'
|
35
|
-
]
|
36
|
-
|
37
|
-
convert_seasons = {'2007': '20072008',
|
38
|
-
'2008': '20082009',
|
39
|
-
'2009': '20092010',
|
40
|
-
'2010': '20102011',
|
41
|
-
'2011': '20112012',
|
42
|
-
'2012': '20122013',
|
43
|
-
'2013': '20132014',
|
44
|
-
'2014': '20142015',
|
45
|
-
'2015': '20152016',
|
46
|
-
'2016': '20162017',
|
47
|
-
'2017': '20172018',
|
48
|
-
'2018': '20182019',
|
49
|
-
'2019': '20192020',
|
50
|
-
'2020': '20202021',
|
51
|
-
'2021': '20212022',
|
52
|
-
'2022': '20222023',
|
53
|
-
'2023': '20232024',
|
54
|
-
'2024': '20242025'}
|
55
|
-
|
56
|
-
convert_team_abbr = {'L.A':'LAK',
|
57
|
-
'N.J':'NJD',
|
58
|
-
'S.J':'SJS',
|
59
|
-
'T.B':'TBL',
|
60
|
-
'PHX':'ARI'}
|
61
|
-
|
62
|
-
per_sixty = ['Fi','xGi','Gi','A1','A2','P1','P','OZF','NZF','DZF','FF','FA','xGF','xGA','GF','GA','CF','CA','HF','HA','Give','Take','Penl','Penl2','Penl5','Draw','Block']
|
63
|
-
|
64
|
-
#Some games in the API are specifically known to cause errors in scraping.
|
65
|
-
#This list is updated as frequently as necessary
|
66
|
-
known_probs = {
|
67
|
-
'2007020011':'Missing shifts data for game between Chicago and Minnesota.',
|
68
|
-
'2007021178':'Game between the Bruins and Sabres is missing data after the second period, for some reason.',
|
69
|
-
'2008020259':'HTML data is completely missing for this game.',
|
70
|
-
'2008020409':'HTML data is completely missing for this game.',
|
71
|
-
'2008021077':'HTML data is completely missing for this game.',
|
72
|
-
'2009020081':'HTML pbp for this game between Pittsburgh and Carolina is missing all but the period start and first faceoff events, for some reason.',
|
73
|
-
'2009020658':'Missing shifts data for game between New York Islanders and Dallas.',
|
74
|
-
'2009020885':'Missing shifts data for game between Sharks and Blue Jackets.',
|
75
|
-
'2010020124':'Game between Capitals and Hurricanes is sporadically missing player on-ice data',
|
76
|
-
'2012020018':'HTML events contain mislabeled events.',
|
77
|
-
'2013020971':'On March 10th, 2014, Stars forward Rich Peverley suffered from a cardiac episode midgame and as a result, the remainder of the game was postponed. \nThe game resumed on April 9th, and the only goal scorer in the game, Blue Jackets forward Nathan Horton, did not appear in the resumed game due to injury. Interestingly, Horton would never play in the NHL again.',
|
78
|
-
'2018021133':'Game between Lightning and Capitals has incorrectly labeled event teams (i.e. WSH TAKEAWAY - #71 CIRELLI (Cirelli is a Tampa Bay skater in this game)).',
|
79
|
-
'2019020876':'Due to the frightening collapse of Blues defensemen Jay Bouwmeester, a game on February 2nd, 2020 between the Ducks and Blues was postponed. \nWhen the game resumed, Ducks defensemen Hampus Lindholm, who assisted on a goal in the inital game, did not play in the resumed match.'
|
80
|
-
}
|
81
|
-
|
82
|
-
shot_types = ['wrist','deflected','tip-in','slap','backhand','snap','wrap-around','poke','bat','cradle','between-legs']
|
83
|
-
|
84
|
-
new = 2024
|
85
|
-
|
86
|
-
standings_end = {
|
87
|
-
'20072008':'04-06',
|
88
|
-
'20082009':'04-12',
|
89
|
-
'20092010':'04-11',
|
90
|
-
'20102011':'04-10',
|
91
|
-
'20112012':'04-07',
|
92
|
-
'20122013':'04-28',
|
93
|
-
'20132014':'04-13',
|
94
|
-
'20142015':'04-11',
|
95
|
-
'20152016':'04-10',
|
96
|
-
'20162017':'04-09',
|
97
|
-
'20172018':'04-08',
|
98
|
-
'20182019':'04-06',
|
99
|
-
'20192020':'03-11',
|
100
|
-
'20202021':'05-19',
|
101
|
-
'20212022':'04-01',
|
102
|
-
'20222023':'04-14',
|
103
|
-
'20232024':'04-18',
|
104
|
-
'20242025':'04-17'
|
105
|
-
}
|
106
|
-
|
107
|
-
events = ['faceoff','hit','giveaway','takeaway','blocked-shot','missed-shot','shot-on-goal','goal','penalty']
|
108
|
-
|
109
|
-
dir = os.path.dirname(os.path.realpath(__file__))
|
110
|
-
schedule_path = os.path.join(dir,'tools\\schedule\\schedule.csv')
|
111
|
-
info_path = os.path.join(dir,'tools\\teaminfo\\nhl_teaminfo.csv')
|
112
|
-
default_roster = os.path.join(dir,'tools\\rosters\\nhl_rosters.csv')
|
113
|
-
|
114
|
-
## SCRAPE FUNCTIONS ##
|
115
|
-
def nhl_scrape_game(game_ids,split_shifts = False, remove = ['period-start','period-end','challenge','stoppage','shootout-complete','game-end'],verbose = False, sources = False, errors = False):
|
116
|
-
#Given a set of game_ids (NHL API), return complete play-by-play information as requested
|
117
|
-
# param 'game_ids' - NHL game ids (or list formatted as ['random', num_of_games, start_year, end_year])
|
118
|
-
# param 'split_shifts' - boolean which splits pbp and shift events if true
|
119
|
-
# param 'remove' - list of events to remove from final dataframe
|
120
|
-
# param 'xg' - xG model to apply to pbp for aggregation
|
121
|
-
# param 'verbose' - boolean which adds additional event info if true
|
122
|
-
# param 'sources - boolean scraping the html and json sources to a master directory if true
|
123
|
-
# param 'errors' - boolean returning game ids which did not scrape if true
|
124
|
-
|
125
|
-
pbps = []
|
126
|
-
if game_ids[0] == 'random':
|
127
|
-
#Randomize selection of game_ids
|
128
|
-
#Some ids returned may be invalid (for example, 2020021300)
|
129
|
-
num = game_ids[1]
|
130
|
-
try:
|
131
|
-
start = game_ids[2]
|
132
|
-
except:
|
133
|
-
start = 2007
|
134
|
-
try:
|
135
|
-
end = game_ids[3]
|
136
|
-
except:
|
137
|
-
end = (date.today().year)-1
|
138
|
-
|
139
|
-
game_ids = []
|
140
|
-
i = 0
|
141
|
-
print("Finding valid, random game ids...")
|
142
|
-
while i is not num:
|
143
|
-
print(f"\rGame IDs found in range {start}-{end}: {i}/{num}",end="")
|
144
|
-
rand_year = random.randint(start,end)
|
145
|
-
rand_season_type = random.randint(2,3)
|
146
|
-
rand_game = random.randint(1,1312)
|
147
|
-
|
148
|
-
#Ensure id validity (and that number of scraped games is equal to specified value)
|
149
|
-
rand_id = f'{rand_year}{rand_season_type:02d}{rand_game:04d}'
|
150
|
-
try:
|
151
|
-
rs.get(f"https://api-web.nhle.com/v1/gamecenter/{rand_id}/play-by-play").json()
|
152
|
-
i += 1
|
153
|
-
game_ids.append(rand_id)
|
154
|
-
except:
|
155
|
-
continue
|
156
|
-
|
157
|
-
print(f"\rGame IDs found in range {start}-{end}: {i}/{num}")
|
158
|
-
|
159
|
-
#Scrape each game
|
160
|
-
#Track Errors
|
161
|
-
error_ids = []
|
162
|
-
prog = 0
|
163
|
-
for game_id in game_ids:
|
164
|
-
print("Scraping data from game " + str(game_id) + "...",end="")
|
165
|
-
start = time.perf_counter()
|
166
|
-
|
167
|
-
try:
|
168
|
-
#Retrieve data
|
169
|
-
info = get_game_info(game_id)
|
170
|
-
data = combine_data(info, sources)
|
171
|
-
|
172
|
-
#Append data to list
|
173
|
-
pbps.append(data)
|
174
|
-
|
175
|
-
end = time.perf_counter()
|
176
|
-
secs = end - start
|
177
|
-
prog += 1
|
178
|
-
|
179
|
-
#Export if sources is true
|
180
|
-
if sources:
|
181
|
-
dirs = f'sources/{info['season']}/'
|
182
|
-
|
183
|
-
if not os.path.exists(dirs):
|
184
|
-
os.makedirs(dirs)
|
185
|
-
|
186
|
-
data.to_csv(f'{dirs}{info['game_id']}.csv',index=False)
|
187
|
-
|
188
|
-
print(f" finished in {secs:.2f} seconds. {prog}/{len(game_ids)} ({(prog/len(game_ids))*100:.2f}%)")
|
189
|
-
except:
|
190
|
-
#Games such as the all-star game and pre-season games will incur this error
|
191
|
-
#Other games have known problems
|
192
|
-
if game_id in known_probs.keys():
|
193
|
-
print(f"\nGame {game_id} has a known problem: {known_probs[game_id]}")
|
194
|
-
else:
|
195
|
-
print(f"\nUnable to scrape game {game_id}. Ensure the ID is properly inputted and formatted.")
|
196
|
-
|
197
|
-
#Track error
|
198
|
-
error_ids.append(game_id)
|
199
|
-
|
200
|
-
#Add all pbps together
|
201
|
-
if len(pbps) == 0:
|
202
|
-
print("\rNo data returned.")
|
203
|
-
return pd.DataFrame()
|
204
|
-
df = pd.concat(pbps)
|
205
|
-
|
206
|
-
#If verbose is true features required to calculate xG are added to dataframe
|
207
|
-
if verbose:
|
208
|
-
df = prep_xG_data(df)
|
209
|
-
else:
|
210
|
-
""
|
211
|
-
|
212
|
-
#Print final message
|
213
|
-
if len(error_ids) > 0:
|
214
|
-
print(f'\rScrape of provided games finished.\nThe following games failed to scrape: {error_ids}')
|
215
|
-
else:
|
216
|
-
print('\rScrape of provided games finished.')
|
217
|
-
|
218
|
-
#Split pbp and shift events if necessary
|
219
|
-
#Return: complete play-by-play with data removed or split as necessary
|
220
|
-
|
221
|
-
if split_shifts == True:
|
222
|
-
remove.append('change')
|
223
|
-
|
224
|
-
#Return: dict with pbp and shifts seperated
|
225
|
-
pbp_dict = {"pbp":df.loc[~df['event_type'].isin(remove)],
|
226
|
-
"shifts":df.loc[df['event_type']=='change']
|
227
|
-
}
|
228
|
-
|
229
|
-
if errors:
|
230
|
-
pbp_dict.update({'errors':error_ids})
|
231
|
-
|
232
|
-
return pbp_dict
|
233
|
-
else:
|
234
|
-
#Return: all events that are not set for removal by the provided list
|
235
|
-
pbp = df.loc[~df['event_type'].isin(remove)]
|
236
|
-
|
237
|
-
if errors:
|
238
|
-
pbp_dict = {'pbp':pbp,
|
239
|
-
'errors':error_ids}
|
240
|
-
|
241
|
-
return pbp_dict
|
242
|
-
else:
|
243
|
-
return pbp
|
244
|
-
|
245
|
-
def nhl_scrape_schedule(season,start = "09-01", end = "08-01"):
|
246
|
-
#Given a season, return schedule data
|
247
|
-
# param 'season' - NHL season to scrape
|
248
|
-
# param 'start' - Start date in season
|
249
|
-
# param 'end' - End date in season
|
250
|
-
|
251
|
-
api = "https://api-web.nhle.com/v1/schedule/"
|
252
|
-
|
253
|
-
#Determine how to approach scraping; if month in season is after the new year the year must be adjusted
|
254
|
-
new_year = ["01","02","03","04","05","06"]
|
255
|
-
if start[:2] in new_year:
|
256
|
-
start = str(int(season[:4])+1)+"-"+start
|
257
|
-
end = str(season[:-4])+"-"+end
|
258
|
-
else:
|
259
|
-
start = str(season[:4])+"-"+start
|
260
|
-
end = str(season[:-4])+"-"+end
|
261
|
-
|
262
|
-
form = '%Y-%m-%d'
|
263
|
-
|
264
|
-
#Create datetime values from dates
|
265
|
-
start = datetime.strptime(start,form)
|
266
|
-
end = datetime.strptime(end,form)
|
267
|
-
|
268
|
-
game = []
|
269
|
-
|
270
|
-
day = (end-start).days+1
|
271
|
-
if day < 0:
|
272
|
-
#Handles dates which are over a year apart
|
273
|
-
day = 365 + day
|
274
|
-
for i in range(day):
|
275
|
-
#For each day, call NHL api and retreive info on all games of selected game
|
276
|
-
inc = start+timedelta(days=i)
|
277
|
-
print("Scraping games on " + str(inc)[:10]+"...")
|
278
|
-
|
279
|
-
get = rs.get(api+str(inc)[:10]).json()
|
280
|
-
gameWeek = pd.json_normalize(list(pd.json_normalize(get['gameWeek'])['games'])[0])
|
281
|
-
|
282
|
-
#Return nothing if there's nothing
|
283
|
-
if gameWeek.empty:
|
284
|
-
game.append(gameWeek)
|
285
|
-
else:
|
286
|
-
gameWeek['date'] = get['gameWeek'][0]['date']
|
287
|
-
|
288
|
-
gameWeek['season_type'] = gameWeek['gameType']
|
289
|
-
gameWeek['away_team_abbr'] = gameWeek['awayTeam.abbrev']
|
290
|
-
gameWeek['home_team_abbr'] = gameWeek['homeTeam.abbrev']
|
291
|
-
gameWeek['game_title'] = gameWeek['away_team_abbr'] + " @ " + gameWeek['home_team_abbr'] + " - " + gameWeek['date']
|
292
|
-
gameWeek['estStartTime'] = pd.to_datetime(gameWeek['startTimeUTC']).dt.tz_convert('US/Eastern').dt.strftime("%I:%M %p")
|
293
|
-
|
294
|
-
front_col = ['id','season','date','season_type','game_title','away_team_abbr','home_team_abbr','estStartTime']
|
295
|
-
gameWeek = gameWeek[front_col+[col for col in gameWeek.columns.to_list() if col not in front_col]]
|
296
|
-
|
297
|
-
game.append(gameWeek)
|
298
|
-
|
299
|
-
#Concatenate all games
|
300
|
-
df = pd.concat(game)
|
301
|
-
|
302
|
-
#Return: specificed schedule data
|
303
|
-
return df
|
304
|
-
|
305
|
-
def nhl_scrape_season(season,split_shifts = False, season_types = [2,3], remove = ['period-start','period-end','game-end','challenge','stoppage'], start = "09-01", end = "08-01", local=False, local_path = schedule_path, verbose = False, sources = False, errors = False):
|
306
|
-
#Given season, scrape all play-by-play occuring within the season
|
307
|
-
# param 'season' - NHL season to scrape
|
308
|
-
# param 'split_shifts' - boolean which splits pbp and shift events if true
|
309
|
-
# param 'remove' - list of events to remove from final dataframe
|
310
|
-
# param 'start' - Start date in season
|
311
|
-
# param 'end' - End date in season
|
312
|
-
# param 'local' - boolean indicating whether to use local file to scrape game_ids
|
313
|
-
# param 'local_path' - path of local file
|
314
|
-
# param 'verbose' - boolean which adds additional event info if true
|
315
|
-
# param 'sources - boolean scraping the html and json sources to a master directory if true
|
316
|
-
# param 'errors' - boolean returning game ids which did not scrape if true
|
317
|
-
|
318
|
-
#Determine whether to use schedule data in repository or to scrape
|
319
|
-
if local:
|
320
|
-
load = pd.read_csv(local_path)
|
321
|
-
load['date'] = pd.to_datetime(load['date'])
|
322
|
-
|
323
|
-
start = f'{(season[0:4] if int(start[0:2])>=9 else season[4:8])}-{int(start[0:2])}-{int(start[3:5])}'
|
324
|
-
end = f'{(season[0:4] if int(end[0:2])>=9 else season[4:8])}-{int(end[0:2])}-{int(end[3:5])}'
|
325
|
-
|
326
|
-
load = load.loc[(load['season'].astype(str)==season)&
|
327
|
-
(load['season_type'].isin(season_types))&
|
328
|
-
(load['date']>=start)&(load['date']<=end)]
|
329
|
-
|
330
|
-
game_ids = list(load['id'].astype(str))
|
331
|
-
else:
|
332
|
-
load = nhl_scrape_schedule(season,start,end)
|
333
|
-
load = load.loc[(load['season'].astype(str)==season)&(load['season_type'].isin(season_types))]
|
334
|
-
game_ids = list(load['id'].astype(str))
|
335
|
-
|
336
|
-
#If no games found, terminate the process
|
337
|
-
if not game_ids:
|
338
|
-
print('No games found for dates in season...')
|
339
|
-
return ""
|
340
|
-
|
341
|
-
print(f"Scraping games from {season[0:4]}-{season[4:8]} season...")
|
342
|
-
start = time.perf_counter()
|
343
|
-
|
344
|
-
#Perform scrape
|
345
|
-
if split_shifts:
|
346
|
-
data = nhl_scrape_game(game_ids,split_shifts=True,remove=remove,verbose=verbose,sources=sources,errors=errors)
|
347
|
-
else:
|
348
|
-
data = nhl_scrape_game(game_ids,remove=remove,verbose=verbose,sources=sources,errors=errors)
|
349
|
-
|
350
|
-
end = time.perf_counter()
|
351
|
-
secs = end - start
|
352
|
-
|
353
|
-
print(f'Finished season scrape in {(secs/60)/60:.2f} hours.')
|
354
|
-
#Return: Complete pbp and shifts data for specified season as well as dataframe of game_ids which failed to return data
|
355
|
-
return data
|
356
|
-
|
357
|
-
def nhl_scrape_seasons_info(seasons = []):
|
358
|
-
#Returns info related to NHL seasons (by default, all seasons are included)
|
359
|
-
# param 'season' - list of seasons to include
|
360
|
-
|
361
|
-
print("Scraping info for seasons: " + str(seasons))
|
362
|
-
api = "https://api.nhle.com/stats/rest/en/season"
|
363
|
-
info = "https://api-web.nhle.com/v1/standings-season"
|
364
|
-
data = rs.get(api).json()['data']
|
365
|
-
data_2 = rs.get(info).json()['seasons']
|
366
|
-
|
367
|
-
df = pd.json_normalize(data)
|
368
|
-
df_2 = pd.json_normalize(data_2)
|
369
|
-
|
370
|
-
df = pd.merge(df,df_2,how='outer',on=['id'])
|
371
|
-
|
372
|
-
if len(seasons) > 0:
|
373
|
-
return df.loc[df['id'].astype(str).isin(seasons)].sort_values(by=['id'])
|
374
|
-
else:
|
375
|
-
return df.sort_values(by=['id'])
|
376
|
-
|
377
|
-
def nhl_scrape_standings(arg = "now", season_type = 2):
|
378
|
-
#Returns standings
|
379
|
-
# param 'arg' - by default, this is "now" returning active NHL standings. May also be a specific date formatted as YYYY-MM-DD, a season (scrapes the last standings date for the season) or a year (for playoffs).
|
380
|
-
# param 'season_type' - by default, this scrapes the regular season standings. If set to 3, it returns the playoff bracket for the specified season
|
381
|
-
|
382
|
-
#arg param is ignored when set to "now" if season_type param is 3
|
383
|
-
if season_type == 3:
|
384
|
-
if arg == "now":
|
385
|
-
arg = new
|
386
|
-
|
387
|
-
print(f"Scraping playoff bracket for date: {arg}")
|
388
|
-
api = f"https://api-web.nhle.com/v1/playoff-bracket/{arg}"
|
389
|
-
|
390
|
-
data = rs.get(api).json()['series']
|
391
|
-
|
392
|
-
return pd.json_normalize(data)
|
393
|
-
|
394
|
-
else:
|
395
|
-
if arg == "now":
|
396
|
-
print("Scraping standings as of now...")
|
397
|
-
elif arg in seasons:
|
398
|
-
print(f'Scraping standings for season: {arg}')
|
399
|
-
else:
|
400
|
-
print(f"Scraping standings for date: {arg}")
|
401
|
-
|
402
|
-
api = f"https://api-web.nhle.com/v1/standings/{arg[4:8]}-{standings_end[arg]}"
|
403
|
-
data = rs.get(api).json()['standings']
|
404
|
-
|
405
|
-
return pd.json_normalize(data)
|
406
|
-
|
407
|
-
def nhl_scrape_roster(season):
|
408
|
-
#Given a nhl season, return rosters for all participating teams
|
409
|
-
# param 'season' - NHL season to scrape
|
410
|
-
print("Scrpaing rosters for the "+ season + "season...")
|
411
|
-
teaminfo = pd.read_csv(info_path)
|
412
|
-
|
413
|
-
rosts = []
|
414
|
-
for team in list(teaminfo['Team']):
|
415
|
-
try:
|
416
|
-
print("Scraping " + team + " roster...")
|
417
|
-
api = "https://api-web.nhle.com/v1/roster/"+team+"/"+season
|
418
|
-
|
419
|
-
data = rs.get(api).json()
|
420
|
-
forwards = pd.json_normalize(data['forwards'])
|
421
|
-
forwards['headingPosition'] = "F"
|
422
|
-
dmen = pd.json_normalize(data['defensemen'])
|
423
|
-
dmen['headingPosition'] = "D"
|
424
|
-
goalies = pd.json_normalize(data['goalies'])
|
425
|
-
goalies['headingPosition'] = "G"
|
426
|
-
|
427
|
-
roster = pd.concat([forwards,dmen,goalies]).reset_index(drop=True)
|
428
|
-
roster['fullName'] = (roster['firstName.default']+" "+roster['lastName.default']).str.upper()
|
429
|
-
roster['season'] = str(season)
|
430
|
-
roster['team_abbr'] = team
|
431
|
-
|
432
|
-
rosts.append(roster)
|
433
|
-
except:
|
434
|
-
print("No roster found for " + team + "...")
|
435
|
-
|
436
|
-
return pd.concat(rosts)
|
437
|
-
|
438
|
-
def nhl_scrape_prospects(team):
|
439
|
-
#Given team abbreviation, retreive current team prospects
|
440
|
-
|
441
|
-
api = f'https://api-web.nhle.com/v1/prospects/{team}'
|
442
|
-
|
443
|
-
data = rs.get(api).json()
|
444
|
-
|
445
|
-
#Iterate through positions
|
446
|
-
players = [pd.json_normalize(data[pos]) for pos in ['forwards','defensemen','goalies']]
|
447
|
-
|
448
|
-
prospects = pd.concat(players)
|
449
|
-
#Add name columns
|
450
|
-
prospects['fullName'] = (prospects['firstName.default']+" "+prospects['lastName.default']).str.upper()
|
451
|
-
|
452
|
-
#Return: team prospects
|
453
|
-
return prospects
|
454
|
-
|
455
|
-
def nhl_scrape_team_info(country = False):
|
456
|
-
#Given option to return franchise or country, return team information
|
457
|
-
|
458
|
-
print('Scraping team information...')
|
459
|
-
api = f'https://api.nhle.com/stats/rest/en/{'country' if country else 'team'}'
|
460
|
-
|
461
|
-
data = pd.json_normalize(rs.get(api).json()['data'])
|
462
|
-
|
463
|
-
#Add logos if necessary
|
464
|
-
if not country:
|
465
|
-
data['logo_light'] = 'https://assets.nhle.com/logos/nhl/svg/'+data['triCode']+'_light.svg'
|
466
|
-
data['logo_dark'] = 'https://assets.nhle.com/logos/nhl/svg/'+data['triCode']+'_dark.svg'
|
467
|
-
|
468
|
-
return data.sort_values(by=(['country3Code','countryCode','iocCode','countryName'] if country else ['fullName','triCode','id']))
|
469
|
-
|
470
|
-
def nhl_scrape_player_data(player_ids):
|
471
|
-
#Given player id, return player information
|
472
|
-
infos = []
|
473
|
-
for player_id in player_ids:
|
474
|
-
player_id = int(player_id)
|
475
|
-
api = f'https://api-web.nhle.com/v1/player/{player_id}/landing'
|
476
|
-
|
477
|
-
data = pd.json_normalize(rs.get(api).json())
|
478
|
-
#Add name column
|
479
|
-
data['fullName'] = (data['firstName.default'] + " " + data['lastName.default']).str.upper()
|
480
|
-
|
481
|
-
#Append
|
482
|
-
infos.append(data)
|
483
|
-
|
484
|
-
if infos:
|
485
|
-
df = pd.concat(infos)
|
486
|
-
|
487
|
-
#Return: player data
|
488
|
-
return df
|
489
|
-
else:
|
490
|
-
return pd.DataFrame()
|
491
|
-
|
492
|
-
def nhl_scrape_draft_rankings(arg = 'now', category = ''):
|
493
|
-
#Given url argument for timeframe and prospect category, return draft rankings
|
494
|
-
#Category 1 is North American Skaters
|
495
|
-
#Category 2 is International Skaters
|
496
|
-
#Category 3 is North American Goalie
|
497
|
-
#Category 4 is International Goalie
|
498
|
-
|
499
|
-
#Player category only applies when requesting a specific season
|
500
|
-
api = f"https://api-web.nhle.com/v1/draft/rankings/{arg}/{category}" if category != "" else f"https://api-web.nhle.com/v1/draft/rankings/{arg}"
|
501
|
-
data = pd.json_normalize(rs.get(api).json()['rankings'])
|
502
|
-
|
503
|
-
#Add player name columns
|
504
|
-
data['fullName'] = (data['firstName']+" "+data['lastName']).str.upper()
|
505
|
-
|
506
|
-
#Return: prospect rankings
|
507
|
-
return data
|
508
|
-
|
509
|
-
def nhl_apply_xG(pbp):
|
510
|
-
#Given play-by-play data, return this data with xG-related columns
|
511
|
-
|
512
|
-
#param 'pbp' - play-by-play data
|
513
|
-
|
514
|
-
print(f'Applying WSBA xG to model with seasons: {pbp['season'].drop_duplicates().to_list()}')
|
515
|
-
|
516
|
-
#Apply xG model
|
517
|
-
pbp = wsba_xG(pbp)
|
518
|
-
|
519
|
-
return pbp
|
520
|
-
|
521
|
-
def nhl_shooting_impacts(agg,type):
|
522
|
-
#Given stats table generated from the nhl_calculate_stats function, return table with shot impacts
|
523
|
-
#Only 5v5 is supported as of now
|
524
|
-
|
525
|
-
#param 'agg' - stats table
|
526
|
-
#param 'type' - type of stats to calculate ('skater', 'goalie', or 'team')
|
527
|
-
|
528
|
-
#COMPOSITE IMPACT EVALUATIONS:
|
529
|
-
|
530
|
-
#SR = Shot Rate
|
531
|
-
#SQ = Shot Quality
|
532
|
-
#FN = Finishing
|
533
|
-
|
534
|
-
#I = Impact
|
535
|
-
|
536
|
-
#INDV = Individual
|
537
|
-
#OOFF = On-Ice Offense
|
538
|
-
#ODEF = On-Ice Defense
|
539
|
-
|
540
|
-
#Grouping-Metric Code: XXXX-YYI
|
541
|
-
|
542
|
-
#Goal Composition Formula
|
543
|
-
#The aggregation of goals is composed of three factors: shot rate, shot quality, and finishing
|
544
|
-
#These are represented by their own metrics in which Goals = (Fenwick*(League Average Fenwick SH%)) + ((xGoals/Fenwick - League Average Fenwick SH%)*Fenwick) + (Goals - xGoals)
|
545
|
-
def goal_comp(fenwick,xg_fen,xg,g,fsh):
|
546
|
-
rate = fenwick * fsh
|
547
|
-
qual = (xg_fen-fsh)*fenwick
|
548
|
-
fini = g-xg
|
549
|
-
|
550
|
-
return rate+qual+fini
|
551
|
-
|
552
|
-
if type == 'goalie':
|
553
|
-
pos = agg
|
554
|
-
for group in [('OOFF','F'),('ODEF','A')]:
|
555
|
-
#Have to set this columns for compatibility with df.apply
|
556
|
-
pos['fsh'] = pos[f'Fsh{group[1]}%']
|
557
|
-
pos['fenwick'] = pos[f'F{group[1]}/60']
|
558
|
-
pos['xg'] = pos[f'xG{group[1]}/60']
|
559
|
-
pos['g'] = pos[f'G{group[1]}/60']
|
560
|
-
pos['xg_fen'] = pos[f'xG{group[1]}/F{group[1]}']
|
561
|
-
pos['finishing'] = pos[f'G{group[1]}/xG{group[1]}']
|
562
|
-
|
563
|
-
#Find average for position in frame
|
564
|
-
avg_fen = pos['fenwick'].mean()
|
565
|
-
avg_xg = pos['xg'].mean()
|
566
|
-
avg_g = pos['g'].mean()
|
567
|
-
avg_fsh = avg_g/avg_fen
|
568
|
-
avg_xg_fen = avg_xg/avg_fen
|
569
|
-
|
570
|
-
#Calculate composite percentiles
|
571
|
-
pos[f'{group[0]}-SR'] = pos['fenwick'].rank(pct=True)
|
572
|
-
pos[f'{group[0]}-SQ'] = pos['xg_fen'].rank(pct=True)
|
573
|
-
pos[f'{group[0]}-FN'] = pos['finishing'].rank(pct=True)
|
574
|
-
|
575
|
-
#Calculate shot rate, shot quality, and finishing impacts
|
576
|
-
pos[f'{group[0]}-SRI'] = pos['g'] - pos.apply(lambda x: goal_comp(avg_fen,x.xg_fen,x.xg,x.g,avg_fsh),axis=1)
|
577
|
-
pos[f'{group[0]}-SQI'] = pos['g'] - pos.apply(lambda x: goal_comp(x.fenwick,avg_xg_fen,x.xg,x.g,avg_fsh),axis=1)
|
578
|
-
pos[f'{group[0]}-FNI'] = pos['g'] - pos.apply(lambda x: goal_comp(x.fenwick,x.xg_fen,avg_xg,avg_g,avg_fsh),axis=1)
|
579
|
-
|
580
|
-
#Convert impacts to totals
|
581
|
-
#Calculate shot rate, shot quality, and finishing impacts
|
582
|
-
pos[f'{group[0]}-SRI-T'] = (pos[f'{group[0]}-SRI']/60)*pos['TOI']
|
583
|
-
pos[f'{group[0]}-SQI-T'] = (pos[f'{group[0]}-SQI']/60)*pos['TOI']
|
584
|
-
pos[f'{group[0]}-FNI-T'] = (pos[f'{group[0]}-FNI']/60)*pos['TOI']
|
585
|
-
|
586
|
-
#Rank per 60 stats
|
587
|
-
for stat in ['FF','FA','xGF','xGA','GF','GA','CF','CA','GSAx']:
|
588
|
-
pos[f'{stat}/60-P'] = pos[f'{stat}/60'].rank(pct=True)
|
589
|
-
|
590
|
-
#Flip percentiles for against stats
|
591
|
-
for stat in ['FA','xGA','GA','CA']:
|
592
|
-
pos[f'{stat}/60-P'] = 1-pos[f'{stat}/60-P']
|
593
|
-
|
594
|
-
#Add extra metrics
|
595
|
-
pos['RushF/60'] = (pos['RushF']/pos['TOI'])*60
|
596
|
-
pos['RushA/60'] = (pos['RushA']/pos['TOI'])*60
|
597
|
-
pos['RushesFF'] = pos['RushF/60'].rank(pct=True)
|
598
|
-
pos['RushesFA'] = 1 - pos['RushA/60'].rank(pct=True)
|
599
|
-
pos['RushFxG/60'] = (pos['RushFxG']/pos['TOI'])*60
|
600
|
-
pos['RushAxG/60'] = (pos['RushAxG']/pos['TOI'])*60
|
601
|
-
pos['RushesxGF'] = pos['RushFxG/60'].rank(pct=True)
|
602
|
-
pos['RushesxGA'] = 1 - pos['RushAxG/60'].rank(pct=True)
|
603
|
-
pos['RushFG/60'] = (pos['RushFG']/pos['TOI'])*60
|
604
|
-
pos['RushAG/60'] = (pos['RushAG']/pos['TOI'])*60
|
605
|
-
pos['RushesGF'] = pos['RushFG/60'].rank(pct=True)
|
606
|
-
pos['RushesGA'] = 1 - pos['RushAG/60'].rank(pct=True)
|
607
|
-
|
608
|
-
#Flip against metric percentiles
|
609
|
-
pos['ODEF-SR'] = 1-pos['ODEF-SR']
|
610
|
-
pos['ODEF-SQ'] = 1-pos['ODEF-SQ']
|
611
|
-
pos['ODEF-FN'] = 1-pos['ODEF-FN']
|
612
|
-
|
613
|
-
#Extraneous Values
|
614
|
-
pos['EGF'] = pos['OOFF-SRI']+pos['OOFF-SQI']+pos['OOFF-FNI']
|
615
|
-
pos['ExGF'] = pos['OOFF-SRI']+pos['OOFF-SQI']
|
616
|
-
pos['EGA'] = pos['ODEF-SRI']+pos['ODEF-SQI']+pos['ODEF-FNI']
|
617
|
-
pos['ExGA'] = pos['ODEF-SRI']+pos['ODEF-SQI']
|
618
|
-
|
619
|
-
#...and their percentiles
|
620
|
-
pos['EGF-P'] = pos['EGF'].rank(pct=True)
|
621
|
-
pos['ExGF-P'] = pos['ExGF'].rank(pct=True)
|
622
|
-
pos['EGA-P'] = pos['EGA'].rank(pct=True)
|
623
|
-
pos['ExGA-P'] = pos['ExGA'].rank(pct=True)
|
624
|
-
|
625
|
-
pos['EGA-P'] = 1-pos['EGA']
|
626
|
-
pos['ExGA-P'] = 1-pos['ExGA']
|
627
|
-
|
628
|
-
#...and then their totals
|
629
|
-
pos['EGF-T'] = (pos['EGF']/60)*pos['TOI']
|
630
|
-
pos['ExGF-T'] = (pos['ExGF']/60)*pos['TOI']
|
631
|
-
pos['EGA-T'] = (pos['EGA']/60)*pos['TOI']
|
632
|
-
pos['ExGA-T'] = (pos['ExGA']/60)*pos['TOI']
|
633
|
-
|
634
|
-
#Goal Composites...
|
635
|
-
pos['Team-Adjusted-EGI'] = pos['ODEF-FNI']-pos['ExGA']
|
636
|
-
pos['GISAx'] = pos['ExGA']-pos['EGA']
|
637
|
-
pos['NetGI'] = pos['EGF'] - pos['EGA']
|
638
|
-
pos['NetxGI'] = pos['ExGF'] - pos['ExGA']
|
639
|
-
|
640
|
-
#...and their percentiles
|
641
|
-
pos['Team-Adjusted-EGI-P'] = pos['Team-Adjusted-EGI'].rank(pct=True)
|
642
|
-
pos['GISAx-P'] = pos['GISAx'].rank(pct=True)
|
643
|
-
pos['NetGI-P'] = pos['NetGI'].rank(pct=True)
|
644
|
-
pos['NetxGI-P'] = pos['NetxGI'].rank(pct=True)
|
645
|
-
|
646
|
-
#...and then their totals
|
647
|
-
pos['Team-Adjusted-EGI-T'] = (pos['Team-Adjusted-EGI']/60)*pos['TOI']
|
648
|
-
pos['GISAx-T'] = (pos['GISAx']/60)*pos['TOI']
|
649
|
-
pos['NetGI-T'] = (pos['NetGI']/60)*pos['TOI']
|
650
|
-
pos['NetxGI-T'] = (pos['NetxGI']/60)*pos['TOI']
|
651
|
-
|
652
|
-
#Return: team stats with shooting impacts
|
653
|
-
return pos.drop(columns=['fsh','fenwick','xg_fen','xg','g','finishing']).sort_values(['Goalie','Season','Team'])
|
654
|
-
|
655
|
-
elif type =='team':
|
656
|
-
pos = agg
|
657
|
-
for group in [('OOFF','F'),('ODEF','A')]:
|
658
|
-
#Have to set this columns for compatibility with df.apply
|
659
|
-
pos['fsh'] = pos[f'Fsh{group[1]}%']
|
660
|
-
pos['fenwick'] = pos[f'F{group[1]}/60']
|
661
|
-
pos['xg'] = pos[f'xG{group[1]}/60']
|
662
|
-
pos['g'] = pos[f'G{group[1]}/60']
|
663
|
-
pos['xg_fen'] = pos[f'xG{group[1]}/F{group[1]}']
|
664
|
-
pos['finishing'] = pos[f'G{group[1]}/xG{group[1]}']
|
665
|
-
|
666
|
-
#Find average for position in frame
|
667
|
-
avg_fen = pos['fenwick'].mean()
|
668
|
-
avg_xg = pos['xg'].mean()
|
669
|
-
avg_g = pos['g'].mean()
|
670
|
-
avg_fsh = avg_g/avg_fen
|
671
|
-
avg_xg_fen = avg_xg/avg_fen
|
672
|
-
|
673
|
-
#Calculate composite percentiles
|
674
|
-
pos[f'{group[0]}-SR'] = pos['fenwick'].rank(pct=True)
|
675
|
-
pos[f'{group[0]}-SQ'] = pos['xg_fen'].rank(pct=True)
|
676
|
-
pos[f'{group[0]}-FN'] = pos['finishing'].rank(pct=True)
|
677
|
-
|
678
|
-
#Calculate shot rate, shot quality, and finishing impacts
|
679
|
-
pos[f'{group[0]}-SRI'] = pos['g'] - pos.apply(lambda x: goal_comp(avg_fen,x.xg_fen,x.xg,x.g,avg_fsh),axis=1)
|
680
|
-
pos[f'{group[0]}-SQI'] = pos['g'] - pos.apply(lambda x: goal_comp(x.fenwick,avg_xg_fen,x.xg,x.g,avg_fsh),axis=1)
|
681
|
-
pos[f'{group[0]}-FNI'] = pos['g'] - pos.apply(lambda x: goal_comp(x.fenwick,x.xg_fen,avg_xg,avg_g,avg_fsh),axis=1)
|
682
|
-
|
683
|
-
#Convert impacts to totals
|
684
|
-
#Calculate shot rate, shot quality, and finishing impacts
|
685
|
-
pos[f'{group[0]}-SRI-T'] = (pos[f'{group[0]}-SRI']/60)*pos['TOI']
|
686
|
-
pos[f'{group[0]}-SQI-T'] = (pos[f'{group[0]}-SQI']/60)*pos['TOI']
|
687
|
-
pos[f'{group[0]}-FNI-T'] = (pos[f'{group[0]}-FNI']/60)*pos['TOI']
|
688
|
-
|
689
|
-
#Rank per 60 stats
|
690
|
-
for stat in per_sixty[10:len(per_sixty)]:
|
691
|
-
pos[f'{stat}/60-P'] = pos[f'{stat}/60'].rank(pct=True)
|
692
|
-
|
693
|
-
#Flip percentiles for against stats
|
694
|
-
for stat in ['FA','xGA','GA','CA','HA','Give','Penl','Penl2','Penl5']:
|
695
|
-
pos[f'{stat}/60-P'] = 1-pos[f'{stat}/60-P']
|
696
|
-
|
697
|
-
#Add extra metrics
|
698
|
-
pos['RushF/60'] = (pos['RushF']/pos['TOI'])*60
|
699
|
-
pos['RushA/60'] = (pos['RushA']/pos['TOI'])*60
|
700
|
-
pos['RushesFF'] = pos['RushF/60'].rank(pct=True)
|
701
|
-
pos['RushesFA'] = 1 - pos['RushA/60'].rank(pct=True)
|
702
|
-
pos['RushFxG/60'] = (pos['RushFxG']/pos['TOI'])*60
|
703
|
-
pos['RushAxG/60'] = (pos['RushAxG']/pos['TOI'])*60
|
704
|
-
pos['RushesxGF'] = pos['RushFxG/60'].rank(pct=True)
|
705
|
-
pos['RushesxGA'] = 1 - pos['RushAxG/60'].rank(pct=True)
|
706
|
-
pos['RushFG/60'] = (pos['RushFG']/pos['TOI'])*60
|
707
|
-
pos['RushAG/60'] = (pos['RushAG']/pos['TOI'])*60
|
708
|
-
pos['RushesGF'] = pos['RushFG/60'].rank(pct=True)
|
709
|
-
pos['RushesGA'] = 1 - pos['RushAG/60'].rank(pct=True)
|
710
|
-
|
711
|
-
#Flip against metric percentiles
|
712
|
-
pos['ODEF-SR'] = 1-pos['ODEF-SR']
|
713
|
-
pos['ODEF-SQ'] = 1-pos['ODEF-SQ']
|
714
|
-
pos['ODEF-FN'] = 1-pos['ODEF-FN']
|
715
|
-
|
716
|
-
pos['EGF'] = pos['OOFF-SRI']+pos['OOFF-SQI']+pos['OOFF-FNI']
|
717
|
-
pos['ExGF'] = pos['OOFF-SRI']+pos['OOFF-SQI']
|
718
|
-
pos['EGA'] = pos['ODEF-SRI']+pos['ODEF-SQI']+pos['ODEF-FNI']
|
719
|
-
pos['ExGA'] = pos['ODEF-SRI']+pos['ODEF-SQI']
|
720
|
-
|
721
|
-
#...and their percentiles
|
722
|
-
pos['EGF-P'] = pos['EGF'].rank(pct=True)
|
723
|
-
pos['ExGF-P'] = pos['ExGF'].rank(pct=True)
|
724
|
-
pos['EGA-P'] = pos['EGA'].rank(pct=True)
|
725
|
-
pos['ExGA-P'] = pos['ExGA'].rank(pct=True)
|
726
|
-
|
727
|
-
pos['EGA-P'] = 1-pos['EGA']
|
728
|
-
pos['ExGA-P'] = 1-pos['ExGA']
|
729
|
-
|
730
|
-
#...and then their totals
|
731
|
-
pos['EGF-T'] = (pos['EGF']/60)*pos['TOI']
|
732
|
-
pos['ExGF-T'] = (pos['ExGF']/60)*pos['TOI']
|
733
|
-
pos['EGA-T'] = (pos['EGA']/60)*pos['TOI']
|
734
|
-
pos['ExGA-T'] = (pos['ExGA']/60)*pos['TOI']
|
735
|
-
|
736
|
-
#Return: team stats with shooting impacts
|
737
|
-
return pos.drop(columns=['fsh','fenwick','xg_fen','xg','g','finishing']).sort_values(['Season','Team'])
|
738
|
-
|
739
|
-
else:
|
740
|
-
#Remove skaters with less than 150 minutes of TOI then split between forwards and dmen
|
741
|
-
#These are added back in after the fact
|
742
|
-
forwards = agg.loc[(agg['Position']!='D')&(agg['TOI']>=150)]
|
743
|
-
defensemen = agg.loc[(agg['Position']=='D')&(agg['TOI']>=150)]
|
744
|
-
non_players = agg.loc[agg['TOI']<150]
|
745
|
-
|
746
|
-
#Loop through both positions, all groupings (INDV, OOFF, and ODEF) generating impacts
|
747
|
-
for pos in [forwards,defensemen]:
|
748
|
-
for group in [('INDV','i'),('OOFF','F'),('ODEF','A')]:
|
749
|
-
#Have to set this columns for compatibility with df.apply
|
750
|
-
pos['fsh'] = pos[f'Fsh{group[1]}%']
|
751
|
-
pos['fenwick'] = pos[f'F{group[1]}/60']
|
752
|
-
pos['xg'] = pos[f'xG{group[1]}/60']
|
753
|
-
pos['g'] = pos[f'G{group[1]}/60']
|
754
|
-
pos['xg_fen'] = pos[f'xG{group[1]}/F{group[1]}']
|
755
|
-
pos['finishing'] = pos[f'G{group[1]}/xG{group[1]}']
|
756
|
-
|
757
|
-
#Find average for position in frame
|
758
|
-
avg_fen = pos['fenwick'].mean()
|
759
|
-
avg_xg = pos['xg'].mean()
|
760
|
-
avg_g = pos['g'].mean()
|
761
|
-
avg_fsh = avg_g/avg_fen
|
762
|
-
avg_xg_fen = avg_xg/avg_fen
|
763
|
-
|
764
|
-
#Calculate composite percentiles
|
765
|
-
pos[f'{group[0]}-SR'] = pos['fenwick'].rank(pct=True)
|
766
|
-
pos[f'{group[0]}-SQ'] = pos['xg_fen'].rank(pct=True)
|
767
|
-
pos[f'{group[0]}-FN'] = pos['finishing'].rank(pct=True)
|
768
|
-
|
769
|
-
#Calculate shot rate, shot quality, and finishing impacts
|
770
|
-
pos[f'{group[0]}-SRI'] = pos['g'] - pos.apply(lambda x: goal_comp(avg_fen,x.xg_fen,x.xg,x.g,avg_fsh),axis=1)
|
771
|
-
pos[f'{group[0]}-SQI'] = pos['g'] - pos.apply(lambda x: goal_comp(x.fenwick,avg_xg_fen,x.xg,x.g,avg_fsh),axis=1)
|
772
|
-
pos[f'{group[0]}-FNI'] = pos['g'] - pos.apply(lambda x: goal_comp(x.fenwick,x.xg_fen,avg_xg,avg_g,avg_fsh),axis=1)
|
773
|
-
|
774
|
-
#Convert impacts to totals
|
775
|
-
#Calculate shot rate, shot quality, and finishing impacts
|
776
|
-
pos[f'{group[0]}-SRI-T'] = (pos[f'{group[0]}-SRI']/60)*pos['TOI']
|
777
|
-
pos[f'{group[0]}-SQI-T'] = (pos[f'{group[0]}-SQI']/60)*pos['TOI']
|
778
|
-
pos[f'{group[0]}-FNI-T'] = (pos[f'{group[0]}-FNI']/60)*pos['TOI']
|
779
|
-
|
780
|
-
#Calculate On-Ice Involvement Percentiles
|
781
|
-
pos['Fi/F'] = pos['FC%'].rank(pct=True)
|
782
|
-
pos['xGi/F'] = pos['xGC%'].rank(pct=True)
|
783
|
-
pos['Pi/F'] = pos['GI%'].rank(pct=True)
|
784
|
-
pos['Gi/F'] = pos['GC%'].rank(pct=True)
|
785
|
-
pos['RushFi/60'] = (pos['Rush']/pos['TOI'])*60
|
786
|
-
pos['RushxGi/60'] = (pos['Rush xG']/pos['TOI'])*60
|
787
|
-
pos['RushesxGi'] = pos['RushxGi/60'].rank(pct=True)
|
788
|
-
pos['RushesFi'] = pos['RushFi/60'].rank(pct=True)
|
789
|
-
|
790
|
-
#Rank per 60 stats
|
791
|
-
for stat in per_sixty:
|
792
|
-
pos[f'{stat}/60-P'] = pos[f'{stat}/60'].rank(pct=True)
|
793
|
-
|
794
|
-
#Flip percentiles for against stats
|
795
|
-
for stat in ['FA','xGA','GA','CA','HA','Give','Penl','Penl2','Penl5']:
|
796
|
-
pos[f'{stat}/60-P'] = 1-pos[f'{stat}/60-P']
|
797
|
-
|
798
|
-
#Add positions back together
|
799
|
-
complete = pd.concat([forwards,defensemen])
|
800
|
-
|
801
|
-
#Flip against metric percentiles
|
802
|
-
complete['ODEF-SR'] = 1-complete['ODEF-SR']
|
803
|
-
complete['ODEF-SQ'] = 1-complete['ODEF-SQ']
|
804
|
-
complete['ODEF-FN'] = 1-complete['ODEF-FN']
|
805
|
-
|
806
|
-
#Extraneous Values
|
807
|
-
complete['EGi'] = complete['INDV-SRI']+complete['INDV-SQI']+complete['INDV-FNI']
|
808
|
-
complete['ExGi'] = complete['INDV-SRI']+complete['INDV-SQI']
|
809
|
-
complete['EGF'] = complete['OOFF-SRI']+complete['OOFF-SQI']+complete['OOFF-FNI']
|
810
|
-
complete['ExGF'] = complete['OOFF-SRI']+complete['OOFF-SQI']
|
811
|
-
complete['EGA'] = complete['ODEF-SRI']+complete['ODEF-SQI']+complete['ODEF-FNI']
|
812
|
-
complete['ExGA'] = complete['ODEF-SRI']+complete['ODEF-SQI']
|
813
|
-
|
814
|
-
#...and their percentiles
|
815
|
-
complete['EGi-P'] = complete['EGi'].rank(pct=True)
|
816
|
-
complete['ExGi-P'] = complete['ExGi'].rank(pct=True)
|
817
|
-
complete['EGF-P'] = complete['EGF'].rank(pct=True)
|
818
|
-
complete['ExGF-P'] = complete['ExGF'].rank(pct=True)
|
819
|
-
complete['EGA-P'] = complete['EGA'].rank(pct=True)
|
820
|
-
complete['ExGA-P'] = complete['ExGA'].rank(pct=True)
|
821
|
-
|
822
|
-
complete['EGA-P'] = 1-complete['EGA']
|
823
|
-
complete['ExGA-P'] = 1-complete['ExGA']
|
824
|
-
|
825
|
-
#...and then their totals
|
826
|
-
complete['EGi-T'] = (complete['EGi']/60)*complete['TOI']
|
827
|
-
complete['ExGi-T'] = (complete['ExGi']/60)*complete['TOI']
|
828
|
-
complete['EGF-T'] = (complete['EGF']/60)*complete['TOI']
|
829
|
-
complete['ExGF-T'] = (complete['ExGF']/60)*complete['TOI']
|
830
|
-
complete['EGA-T'] = (complete['EGA']/60)*complete['TOI']
|
831
|
-
complete['ExGA-T'] = (complete['ExGA']/60)*complete['TOI']
|
832
|
-
|
833
|
-
#Goal Composites...
|
834
|
-
complete['LiEG'] = complete['EGF'] - complete['EGi']
|
835
|
-
complete['LiExG'] = complete['ExGF'] - complete['ExGi']
|
836
|
-
complete['LiGIn'] = complete['LiEG']*complete['AC%']
|
837
|
-
complete['LixGIn'] = complete['LiExG']*complete['AC%']
|
838
|
-
complete['ALiGIn'] = complete['LiGIn']-complete['LixGIn']
|
839
|
-
complete['CompGI'] = complete['EGi'] + complete['LiGIn']
|
840
|
-
complete['LiRelGI'] = complete['CompGI'] - (complete['EGF']-complete['CompGI'])
|
841
|
-
complete['NetGI'] = complete['EGF'] - complete['EGA']
|
842
|
-
complete['NetxGI'] = complete['ExGF'] - complete['ExGA']
|
843
|
-
|
844
|
-
#...and their percentiles
|
845
|
-
complete['LiEG-P'] = complete['LiEG'].rank(pct=True)
|
846
|
-
complete['LiExG-P'] = complete['LiExG'].rank(pct=True)
|
847
|
-
complete['LiGIn-P'] = complete['LiGIn'].rank(pct=True)
|
848
|
-
complete['LixGIn-P'] = complete['LixGIn'].rank(pct=True)
|
849
|
-
complete['ALiGIn-P'] = complete['ALiGIn'].rank(pct=True)
|
850
|
-
complete['CompGI-P'] = complete['CompGI'].rank(pct=True)
|
851
|
-
complete['LiRelGI-P'] = complete['LiRelGI'].rank(pct=True)
|
852
|
-
complete['NetGI-P'] = complete['NetGI'].rank(pct=True)
|
853
|
-
complete['NetxGI-P'] = complete['NetxGI'].rank(pct=True)
|
854
|
-
|
855
|
-
#..and then their totals
|
856
|
-
complete['LiEG-T'] = (complete['LiEG']/60)*complete['TOI']
|
857
|
-
complete['LiExG-T'] = (complete['LiExG']/60)*complete['TOI']
|
858
|
-
complete['LiGIn-T'] = (complete['LiGIn']/60)*complete['TOI']
|
859
|
-
complete['LixGIn-T'] = (complete['LixGIn']/60)*complete['TOI']
|
860
|
-
complete['ALiGIn-T'] = (complete['ALiGIn']/60)*complete['TOI']
|
861
|
-
complete['CompGI-T'] = (complete['CompGI']/60)*complete['TOI']
|
862
|
-
complete['LiRelGI-T'] = (complete['LiRelGI']/60)*complete['TOI']
|
863
|
-
complete['NetGI-T'] = (complete['NetGI']/60)*complete['TOI']
|
864
|
-
complete['NetxGI-T'] = (complete['NetxGI']/60)*complete['TOI']
|
865
|
-
|
866
|
-
#Add back skaters with less than 150 minutes TOI
|
867
|
-
df = pd.concat([complete,non_players]).drop(columns=['fsh','fenwick','xg_fen','xg','g','finishing']).sort_values(['Player','Season','Team','ID'])
|
868
|
-
#Return: skater stats with shooting impacts
|
869
|
-
return df
|
870
|
-
|
871
|
-
def nhl_calculate_stats(pbp,type,season_types,game_strength,split_game=False,roster_path=default_roster,shot_impact=False):
|
872
|
-
#Given play-by-play, seasonal information, game_strength, rosters, and xG model, return aggregated stats
|
873
|
-
# param 'pbp' - play-by-play dataframe
|
874
|
-
# param 'type' - type of stats to calculate ('skater', 'goalie', or 'team')
|
875
|
-
# param 'season' - season or timeframe of events in play-by-play
|
876
|
-
# param 'season_type' - list of season types (preseason, regular season, or playoffs) to include in aggregation
|
877
|
-
# param 'game_strength' - list of game_strengths to include in aggregation
|
878
|
-
# param 'split_game' - boolean which if true groups aggregation by game
|
879
|
-
# param 'roster_path' - path to roster file
|
880
|
-
# param 'shot_impact' - boolean determining if the shot impact model will be applied to the dataset
|
881
|
-
|
882
|
-
print(f"Calculating statistics for all games in the provided play-by-play data at {game_strength} for {type}s...\nSeasons included: {pbp['season'].drop_duplicates().to_list()}...")
|
883
|
-
start = time.perf_counter()
|
884
|
-
|
885
|
-
#Check if xG column exists and apply model if it does not
|
886
|
-
try:
|
887
|
-
pbp['xG']
|
888
|
-
except KeyError:
|
889
|
-
pbp = wsba_xG(pbp)
|
890
|
-
|
891
|
-
#Apply season_type filter
|
892
|
-
pbp = pbp.loc[(pbp['season_type'].isin(season_types))]
|
893
|
-
|
894
|
-
#Convert all columns with player ids to float in order to avoid merging errors
|
895
|
-
for col in get_col():
|
896
|
-
if "_id" in col:
|
897
|
-
try: pbp[col] = pbp[col].astype(float)
|
898
|
-
except KeyError: continue
|
899
|
-
|
900
|
-
#Split by game if specified
|
901
|
-
if split_game:
|
902
|
-
second_group = ['season','game_id']
|
903
|
-
else:
|
904
|
-
second_group = ['season']
|
905
|
-
|
906
|
-
#Split calculation
|
907
|
-
if type == 'goalie':
|
908
|
-
complete = calc_goalie(pbp,game_strength,second_group)
|
909
|
-
|
910
|
-
#Set TOI to minute
|
911
|
-
complete['TOI'] = complete['TOI']/60
|
912
|
-
|
913
|
-
#Add per 60 stats
|
914
|
-
for stat in ['FF','FA','xGF','xGA','GF','GA','CF','CA','GSAx']:
|
915
|
-
complete[f'{stat}/60'] = (complete[stat]/complete['TOI'])*60
|
916
|
-
|
917
|
-
complete['GF%'] = complete['GF']/(complete['GF']+complete['GA'])
|
918
|
-
complete['xGF%'] = complete['xGF']/(complete['xGF']+complete['xGA'])
|
919
|
-
complete['FF%'] = complete['FF']/(complete['FF']+complete['FA'])
|
920
|
-
complete['CF%'] = complete['CF']/(complete['CF']+complete['CA'])
|
921
|
-
|
922
|
-
#Remove entries with no ID listed
|
923
|
-
complete = complete.loc[complete['ID'].notna()]
|
924
|
-
|
925
|
-
#Import rosters and player info
|
926
|
-
rosters = pd.read_csv(roster_path)
|
927
|
-
names = rosters[['id','fullName',
|
928
|
-
'headshot','positionCode','shootsCatches',
|
929
|
-
'heightInInches','weightInPounds',
|
930
|
-
'birthDate','birthCountry']].drop_duplicates(subset=['id','fullName'],keep='last')
|
931
|
-
|
932
|
-
#Add names
|
933
|
-
complete = pd.merge(complete,names,how='left',left_on='ID',right_on='id')
|
934
|
-
|
935
|
-
#Rename if there are no missing names
|
936
|
-
complete = complete.rename(columns={'fullName':'Goalie',
|
937
|
-
'headshot':'Headshot',
|
938
|
-
'positionCode':'Position',
|
939
|
-
'shootsCatches':'Handedness',
|
940
|
-
'heightInInches':'Height (in)',
|
941
|
-
'weightInPounds':'Weight (lbs)',
|
942
|
-
'birthDate':'Birthday',
|
943
|
-
'birthCountry':'Nationality'})
|
944
|
-
|
945
|
-
#WSBA
|
946
|
-
complete['WSBA'] = complete['Goalie']+complete['Team']+complete['Season'].astype(str)
|
947
|
-
|
948
|
-
#Add player age
|
949
|
-
complete['Birthday'] = pd.to_datetime(complete['Birthday'])
|
950
|
-
complete['season_year'] = complete['Season'].astype(str).str[4:8].astype(int)
|
951
|
-
complete['Age'] = complete['season_year'] - complete['Birthday'].dt.year
|
952
|
-
|
953
|
-
#Find player headshot
|
954
|
-
complete['Headshot'] = 'https://assets.nhle.com/mugs/nhl/'+complete['Season'].astype(str)+'/'+complete['Team']+'/'+complete['ID'].astype(int).astype(str)+'.png'
|
955
|
-
|
956
|
-
end = time.perf_counter()
|
957
|
-
length = end-start
|
958
|
-
print(f'...finished in {(length if length <60 else length/60):.2f} {'seconds' if length <60 else 'minutes'}.')
|
959
|
-
|
960
|
-
head = ['Goalie','ID','Game'] if 'Game' in complete.columns else ['Goalie','ID']
|
961
|
-
complete = complete[head+[
|
962
|
-
"Season","Team",'WSBA',
|
963
|
-
'Headshot','Position','Handedness',
|
964
|
-
'Height (in)','Weight (lbs)',
|
965
|
-
'Birthday','Age','Nationality',
|
966
|
-
'GP','TOI',
|
967
|
-
"GF","FF","xGF","xGF/FF","GF/xGF","FshF%",
|
968
|
-
"GA","FA","xGA","xGA/FA","GA/xGA","FshA%",
|
969
|
-
'CF','CA',
|
970
|
-
'GSAx',
|
971
|
-
'RushF','RushA','RushFxG','RushAxG','RushFG','RushAG'
|
972
|
-
]+[f'{stat}/60' for stat in ['FF','FA','xGF','xGA','GF','GA','CF','CA','GSAx']]]
|
973
|
-
|
974
|
-
#Apply shot impacts if necessary
|
975
|
-
if shot_impact:
|
976
|
-
complete = nhl_shooting_impacts(complete,'goalie')
|
977
|
-
|
978
|
-
end = time.perf_counter()
|
979
|
-
length = end-start
|
980
|
-
print(f'...finished in {(length if length <60 else length/60):.2f} {'seconds' if length <60 else 'minutes'}.')
|
981
|
-
|
982
|
-
return complete
|
983
|
-
|
984
|
-
elif type == 'team':
|
985
|
-
complete = calc_team(pbp,game_strength,second_group)
|
986
|
-
|
987
|
-
#WSBA
|
988
|
-
complete['WSBA'] = complete['Team']+complete['Season'].astype(str)
|
989
|
-
|
990
|
-
#Set TOI to minute
|
991
|
-
complete['TOI'] = complete['TOI']/60
|
992
|
-
|
993
|
-
#Add per 60 stats
|
994
|
-
for stat in per_sixty[10:len(per_sixty)]:
|
995
|
-
complete[f'{stat}/60'] = (complete[stat]/complete['TOI'])*60
|
996
|
-
|
997
|
-
complete['GF%'] = complete['GF']/(complete['GF']+complete['GA'])
|
998
|
-
complete['xGF%'] = complete['xGF']/(complete['xGF']+complete['xGA'])
|
999
|
-
complete['FF%'] = complete['FF']/(complete['FF']+complete['FA'])
|
1000
|
-
complete['CF%'] = complete['CF']/(complete['CF']+complete['CA'])
|
1001
|
-
|
1002
|
-
head = ['Team','Game'] if 'Game' in complete.columns else ['Team']
|
1003
|
-
complete = complete[head+[
|
1004
|
-
'Season','WSBA',
|
1005
|
-
'GP','TOI',
|
1006
|
-
"GF","FF","xGF","xGF/FF","GF/xGF","FshF%",
|
1007
|
-
"GA","FA","xGA","xGA/FA","GA/xGA","FshA%",
|
1008
|
-
'CF','CA',
|
1009
|
-
'GF%','FF%','xGF%','CF%',
|
1010
|
-
'HF','HA','HF%',
|
1011
|
-
'Penl','Penl2','Penl5','PIM','Draw','PENL%',
|
1012
|
-
'Give','Take','PM%',
|
1013
|
-
'Block',
|
1014
|
-
'RushF','RushA','RushFxG','RushAxG','RushFG','RushAG'
|
1015
|
-
]+[f'{stat}/60' for stat in per_sixty[10:len(per_sixty)]]]
|
1016
|
-
#Apply shot impacts if necessary
|
1017
|
-
if shot_impact:
|
1018
|
-
complete = nhl_shooting_impacts(complete,'team')
|
1019
|
-
|
1020
|
-
end = time.perf_counter()
|
1021
|
-
length = end-start
|
1022
|
-
print(f'...finished in {(length if length <60 else length/60):.2f} {'seconds' if length <60 else 'minutes'}.')
|
1023
|
-
|
1024
|
-
return complete
|
1025
|
-
else:
|
1026
|
-
indv_stats = calc_indv(pbp,game_strength,second_group)
|
1027
|
-
onice_stats = calc_onice(pbp,game_strength,second_group)
|
1028
|
-
|
1029
|
-
#IDs sometimes set as objects
|
1030
|
-
indv_stats['ID'] = indv_stats['ID'].astype(float)
|
1031
|
-
onice_stats['ID'] = onice_stats['ID'].astype(float)
|
1032
|
-
|
1033
|
-
#Merge and add columns for extra stats
|
1034
|
-
complete = pd.merge(indv_stats,onice_stats,how="outer",on=['ID','Team','Season']+(['Game'] if 'game_id' in second_group else []))
|
1035
|
-
complete['GC%'] = complete['Gi']/complete['GF']
|
1036
|
-
complete['AC%'] = (complete['A1']+complete['A2'])/complete['GF']
|
1037
|
-
complete['GI%'] = (complete['Gi']+complete['A1']+complete['A2'])/complete['GF']
|
1038
|
-
complete['FC%'] = complete['Fi']/complete['FF']
|
1039
|
-
complete['xGC%'] = complete['xGi']/complete['xGF']
|
1040
|
-
complete['GF%'] = complete['GF']/(complete['GF']+complete['GA'])
|
1041
|
-
complete['xGF%'] = complete['xGF']/(complete['xGF']+complete['xGA'])
|
1042
|
-
complete['FF%'] = complete['FF']/(complete['FF']+complete['FA'])
|
1043
|
-
complete['CF%'] = complete['CF']/(complete['CF']+complete['CA'])
|
1044
|
-
|
1045
|
-
#Remove entries with no ID listed
|
1046
|
-
complete = complete.loc[complete['ID'].notna()]
|
1047
|
-
|
1048
|
-
#Import rosters and player info
|
1049
|
-
rosters = pd.read_csv(roster_path)
|
1050
|
-
names = rosters[['id','fullName',
|
1051
|
-
'headshot','positionCode','shootsCatches',
|
1052
|
-
'heightInInches','weightInPounds',
|
1053
|
-
'birthDate','birthCountry']].drop_duplicates(subset=['id','fullName'],keep='last')
|
1054
|
-
|
1055
|
-
#Add names
|
1056
|
-
complete = pd.merge(complete,names,how='left',left_on='ID',right_on='id')
|
1057
|
-
|
1058
|
-
#Rename if there are no missing names
|
1059
|
-
complete = complete.rename(columns={'fullName':'Player',
|
1060
|
-
'headshot':'Headshot',
|
1061
|
-
'positionCode':'Position',
|
1062
|
-
'shootsCatches':'Handedness',
|
1063
|
-
'heightInInches':'Height (in)',
|
1064
|
-
'weightInPounds':'Weight (lbs)',
|
1065
|
-
'birthDate':'Birthday',
|
1066
|
-
'birthCountry':'Nationality'})
|
1067
|
-
|
1068
|
-
#Set TOI to minute
|
1069
|
-
complete['TOI'] = complete['TOI']/60
|
1070
|
-
|
1071
|
-
#Add player age
|
1072
|
-
complete['Birthday'] = pd.to_datetime(complete['Birthday'])
|
1073
|
-
complete['season_year'] = complete['Season'].astype(str).str[4:8].astype(int)
|
1074
|
-
complete['Age'] = complete['season_year'] - complete['Birthday'].dt.year
|
1075
|
-
|
1076
|
-
#Find player headshot
|
1077
|
-
complete['Headshot'] = 'https://assets.nhle.com/mugs/nhl/'+complete['Season'].astype(str)+'/'+complete['Team']+'/'+complete['ID'].astype(int).astype(str)+'.png'
|
1078
|
-
|
1079
|
-
#Remove goalies that occasionally appear in a set
|
1080
|
-
complete = complete.loc[complete['Position']!='G']
|
1081
|
-
#Add WSBA ID
|
1082
|
-
complete['WSBA'] = complete['Player']+complete['Season'].astype(str)+complete['Team']
|
1083
|
-
|
1084
|
-
#Add per 60 stats
|
1085
|
-
for stat in per_sixty:
|
1086
|
-
complete[f'{stat}/60'] = (complete[stat]/complete['TOI'])*60
|
1087
|
-
|
1088
|
-
#Shot Type Metrics
|
1089
|
-
type_metrics = []
|
1090
|
-
for type in shot_types:
|
1091
|
-
for stat in per_sixty[:3]:
|
1092
|
-
type_metrics.append(f'{type.capitalize()}{stat}')
|
1093
|
-
|
1094
|
-
head = ['Player','ID','Game'] if 'Game' in complete.columns else ['Player','ID']
|
1095
|
-
complete = complete[head+[
|
1096
|
-
"Season","Team",'WSBA',
|
1097
|
-
'Headshot','Position','Handedness',
|
1098
|
-
'Height (in)','Weight (lbs)',
|
1099
|
-
'Birthday','Age','Nationality',
|
1100
|
-
'GP','TOI',
|
1101
|
-
"Gi","A1","A2",'P1','P',
|
1102
|
-
'Give','Take','PM%','HF','HA','HF%',
|
1103
|
-
"Fi","xGi",'xGi/Fi',"Gi/xGi","Fshi%",
|
1104
|
-
"GF","FF","xGF","xGF/FF","GF/xGF","FshF%",
|
1105
|
-
"GA","FA","xGA","xGA/FA","GA/xGA","FshA%",
|
1106
|
-
'Ci','CF','CA','CF%',
|
1107
|
-
'FF%','xGF%','GF%',
|
1108
|
-
'Rush',"Rush xG",'Rush G',"GC%","AC%","GI%","FC%","xGC%",
|
1109
|
-
'F','FW','FL','F%',
|
1110
|
-
'Penl','Penl2','Penl5',
|
1111
|
-
'Draw','PIM','PENL%',
|
1112
|
-
'Block',
|
1113
|
-
'OZF','NZF','DZF',
|
1114
|
-
'OZF%','NZF%','DZF%',
|
1115
|
-
]+[f'{stat}/60' for stat in per_sixty]+type_metrics].fillna(0).sort_values(['Player','Season','Team','ID'])
|
1116
|
-
|
1117
|
-
#Apply shot impacts if necessary (Note: this will remove skaters with fewer than 150 minutes of TOI due to the shot impact TOI rule)
|
1118
|
-
if shot_impact:
|
1119
|
-
complete = nhl_shooting_impacts(complete,'skater')
|
1120
|
-
|
1121
|
-
end = time.perf_counter()
|
1122
|
-
length = end-start
|
1123
|
-
print(f'...finished in {(length if length <60 else length/60):.2f} {'seconds' if length <60 else 'minutes'}.')
|
1124
|
-
|
1125
|
-
return complete
|
1126
|
-
|
1127
|
-
def nhl_plot_skaters_shots(pbp,skater_dict,strengths,marker_dict=event_markers,onice = 'indv',title = True,legend=False):
|
1128
|
-
#Returns dict of plots for specified skaters
|
1129
|
-
# param 'pbp' - pbp to plot data
|
1130
|
-
# param 'skater_dict' - skaters to plot shots for (format: {'Patrice Bergeron':['20242025','BOS']})
|
1131
|
-
# param 'strengths' - strengths to include in plotting
|
1132
|
-
# param 'marker_dict' - dict with markers to use for events
|
1133
|
-
# param 'onice' - can set which shots to include in plotting for the specified skater ('indv', 'for', 'against')
|
1134
|
-
# param 'title' - bool including title when true
|
1135
|
-
# param 'legend' - bool which includes legend if true
|
1136
|
-
# param 'xg' - xG model to apply to pbp for plotting
|
1137
|
-
|
1138
|
-
print(f'Plotting the following skater shots: {skater_dict}...')
|
1139
|
-
|
1140
|
-
#Iterate through skaters, adding plots to dict
|
1141
|
-
skater_plots = {}
|
1142
|
-
for skater in skater_dict.keys():
|
1143
|
-
skater_info = skater_dict[skater]
|
1144
|
-
title = f'{skater} Fenwick Shots for {skater_info[1]} in {skater_info[0][2:4]}-{skater_info[0][6:8]}' if title else ''
|
1145
|
-
#Key is formatted as PLAYERSEASONTEAM (i.e. PATRICE BERGERON20212022BOS)
|
1146
|
-
skater_plots.update({f'{skater}{skater_info[0]}{skater_info[1]}':[plot_skater_shots(pbp,skater,skater_info[0],skater_info[1],strengths,title,marker_dict,onice,legend)]})
|
1147
|
-
|
1148
|
-
#Return: list of plotted skater shot charts
|
1149
|
-
return skater_plots
|
1150
|
-
|
1151
|
-
def nhl_plot_games(pbp,events,strengths,game_ids='all',marker_dict=event_markers,team_colors={'away':'primary','home':'primary'},legend=False):
|
1152
|
-
#Returns dict of plots for specified games
|
1153
|
-
# param 'pbp' - pbp to plot data
|
1154
|
-
# param 'events' - type of events to plot
|
1155
|
-
# param 'strengths' - strengths to include in plotting
|
1156
|
-
# param 'game_ids' - games to plot (list if not set to 'all')
|
1157
|
-
# param 'marker_dict' - dict with colors to use for events
|
1158
|
-
# param 'legend' - bool which includes legend if true
|
1159
|
-
# param 'xg' - xG model to apply to pbp for plotting
|
1160
|
-
|
1161
|
-
#Find games to scrape
|
1162
|
-
if game_ids == 'all':
|
1163
|
-
game_ids = pbp['game_id'].drop_duplicates().to_list()
|
1164
|
-
|
1165
|
-
print(f'Plotting the following games: {game_ids}...')
|
1166
|
-
|
1167
|
-
game_plots = {}
|
1168
|
-
#Iterate through games, adding plot to dict
|
1169
|
-
for game in game_ids:
|
1170
|
-
game_plots.update({game:[plot_game_events(pbp,game,events,strengths,marker_dict,team_colors,legend)]})
|
1171
|
-
|
1172
|
-
#Return: list of plotted game events
|
1173
|
-
return game_plots
|
1174
|
-
|
1175
|
-
def repo_load_rosters(seasons = []):
|
1176
|
-
#Returns roster data from repository
|
1177
|
-
# param 'seasons' - list of seasons to include
|
1178
|
-
|
1179
|
-
data = pd.read_csv(default_roster)
|
1180
|
-
if len(seasons)>0:
|
1181
|
-
data = data.loc[data['season'].isin(seasons)]
|
1182
|
-
|
1183
|
-
return data
|
1184
|
-
|
1185
|
-
def repo_load_schedule(seasons = []):
|
1186
|
-
#Returns schedule data from repository
|
1187
|
-
# param 'seasons' - list of seasons to include
|
1188
|
-
|
1189
|
-
data = pd.read_csv(schedule_path)
|
1190
|
-
if len(seasons)>0:
|
1191
|
-
data = data.loc[data['season'].isin(seasons)]
|
1192
|
-
|
1193
|
-
return data
|
1194
|
-
|
1195
|
-
def repo_load_teaminfo():
|
1196
|
-
#Returns team data from repository
|
1197
|
-
|
1198
|
-
return pd.read_csv(info_path)
|
1199
|
-
|
1200
|
-
def repo_load_pbp(seasons = []):
|
1201
|
-
#Returns play-by-play data from repository
|
1202
|
-
# param 'seasons' - list of seasons to include
|
1203
|
-
|
1204
|
-
#Add parquet to total
|
1205
|
-
print(f'Loading play-by-play from the following seasons: {seasons}...')
|
1206
|
-
dfs = [pd.read_parquet(f"https://weakside-breakout.s3.us-east-2.amazonaws.com/pbp/{season}.parquet") for season in seasons]
|
1207
|
-
|
1208
|
-
return pd.concat(dfs)
|
1209
|
-
|
1210
|
-
def repo_load_seasons():
|
1211
|
-
#List of available seasons to scrape
|
1212
|
-
|
1213
|
-
return seasons
|