wsba-hockey 1.0.3__py3-none-any.whl → 1.0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- wsba_hockey/__init__.py +1 -1
- wsba_hockey/data_pipelines.py +183 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/duckdb/vendor.py +146 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/flatted/python/flatted.py +149 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/flatted/python/test.py +63 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/gyp_main.py +45 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSNew.py +367 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSProject.py +206 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSSettings.py +1270 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSSettings_test.py +1547 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSToolFile.py +59 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSUserFile.py +153 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSUtil.py +271 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSVersion.py +574 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/__init__.py +690 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/common.py +661 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/common_test.py +78 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/easy_xml.py +165 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/easy_xml_test.py +109 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/flock_tool.py +55 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/__init__.py +0 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/analyzer.py +808 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/android.py +1173 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/cmake.py +1321 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/compile_commands_json.py +120 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/dump_dependency_json.py +103 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/eclipse.py +464 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/gypd.py +89 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/gypsh.py +58 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/make.py +2714 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/msvs.py +3981 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/msvs_test.py +44 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/ninja.py +2936 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/ninja_test.py +55 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/xcode.py +1394 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/xcode_test.py +25 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/input.py +3130 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/input_test.py +98 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/mac_tool.py +771 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/msvs_emulation.py +1271 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/ninja_syntax.py +174 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/simple_copy.py +61 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/win_tool.py +374 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/xcode_emulation.py +1939 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/xcode_ninja.py +302 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/xcodeproj_file.py +3197 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/xml_fix.py +65 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/test_gyp.py +261 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/tools/graphviz.py +102 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/tools/pretty_gyp.py +156 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/tools/pretty_sln.py +181 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/tools/pretty_vcproj.py +339 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/test/fixtures/test-charmap.py +31 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/update-gyp.py +64 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/gyp_main.py +45 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSNew.py +367 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSProject.py +206 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSSettings.py +1270 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSSettings_test.py +1547 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSToolFile.py +59 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSUserFile.py +153 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSUtil.py +271 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSVersion.py +574 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/__init__.py +666 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/common.py +654 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/common_test.py +78 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/easy_xml.py +165 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/easy_xml_test.py +109 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/flock_tool.py +55 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/__init__.py +0 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/analyzer.py +808 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/android.py +1173 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/cmake.py +1321 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/compile_commands_json.py +120 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/dump_dependency_json.py +103 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/eclipse.py +464 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/gypd.py +89 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/gypsh.py +58 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/make.py +2518 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/msvs.py +3978 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/msvs_test.py +44 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/ninja.py +2936 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/ninja_test.py +55 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/xcode.py +1394 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/xcode_test.py +25 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/input.py +3137 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/input_test.py +98 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/mac_tool.py +771 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/msvs_emulation.py +1271 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/ninja_syntax.py +174 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/simple_copy.py +61 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/win_tool.py +374 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/xcode_emulation.py +1939 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/xcode_ninja.py +302 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/xcodeproj_file.py +3197 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/xml_fix.py +65 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/setup.py +42 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/test_gyp.py +260 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/tools/graphviz.py +102 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/tools/pretty_gyp.py +156 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/tools/pretty_sln.py +181 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/tools/pretty_vcproj.py +339 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/test/fixtures/test-charmap.py +31 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/update-gyp.py +46 -0
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/game_stats/app.py +400 -0
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/game_stats/name_fix.py +47 -0
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/heatmaps/app.py +108 -0
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/heatmaps/plot.py +93 -0
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/heatmaps/rink_plot.py +245 -0
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/matchups/app.py +145 -0
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/matchups/plot.py +77 -0
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/matchups/rink_plot.py +245 -0
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/pbp/app.py +389 -0
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/pbp/plot.py +70 -0
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/pbp/rink_plot.py +245 -0
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/skater/app.py +110 -0
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/skater/plot.py +58 -0
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/skater/rink_plot.py +245 -0
- wsba_hockey/tools/agg.py +243 -54
- wsba_hockey/tools/plotting.py +25 -25
- wsba_hockey/tools/scraping.py +154 -263
- wsba_hockey/tools/xg_model.py +369 -315
- wsba_hockey/workspace.py +22 -117
- wsba_hockey/wsba_main.py +499 -167
- {wsba_hockey-1.0.3.dist-info → wsba_hockey-1.0.5.dist-info}/METADATA +1 -1
- wsba_hockey-1.0.5.dist-info/RECORD +135 -0
- {wsba_hockey-1.0.3.dist-info → wsba_hockey-1.0.5.dist-info}/WHEEL +1 -1
- wsba_hockey/stats/calculate_viz/shot_impact.py +0 -2
- wsba_hockey-1.0.3.dist-info/RECORD +0 -19
- {wsba_hockey-1.0.3.dist-info → wsba_hockey-1.0.5.dist-info}/licenses/LICENSE +0 -0
- {wsba_hockey-1.0.3.dist-info → wsba_hockey-1.0.5.dist-info}/top_level.txt +0 -0
wsba_hockey/wsba_main.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1
|
+
import random
|
2
|
+
import os
|
1
3
|
import requests as rs
|
2
4
|
import pandas as pd
|
3
|
-
import numpy as np
|
4
|
-
from datetime import datetime, timedelta, date
|
5
5
|
import time
|
6
|
-
import
|
6
|
+
from datetime import datetime, timedelta, date
|
7
7
|
from .tools.scraping import *
|
8
8
|
from .tools.xg_model import *
|
9
9
|
from .tools.agg import *
|
@@ -59,11 +59,11 @@ convert_team_abbr = {'L.A':'LAK',
|
|
59
59
|
'T.B':'TBL',
|
60
60
|
'PHX':'ARI'}
|
61
61
|
|
62
|
-
per_sixty = ['Fi','xGi','Gi','A1','A2','P1','P','FF','FA','xGF','xGA','GF','GA']
|
62
|
+
per_sixty = ['Fi','xGi','Gi','A1','A2','P1','P','OZF','NZF','DZF','FF','FA','xGF','xGA','GF','GA','CF','CA','HF','HA','Give','Take','Penl','Penl2','Penl5','Draw','Block']
|
63
63
|
|
64
64
|
#Some games in the API are specifically known to cause errors in scraping.
|
65
65
|
#This list is updated as frequently as necessary
|
66
|
-
known_probs ={
|
66
|
+
known_probs = {
|
67
67
|
'2007020011':'Missing shifts data for game between Chicago and Minnesota.',
|
68
68
|
'2007021178':'Game between the Bruins and Sabres is missing data after the second period, for some reason.',
|
69
69
|
'2008020259':'HTML data is completely missing for this game.',
|
@@ -73,15 +73,12 @@ known_probs ={
|
|
73
73
|
'2009020658':'Missing shifts data for game between New York Islanders and Dallas.',
|
74
74
|
'2009020885':'Missing shifts data for game between Sharks and Blue Jackets.',
|
75
75
|
'2010020124':'Game between Capitals and Hurricanes is sporadically missing player on-ice data',
|
76
|
+
'2012020018':'HTML events contain mislabeled events.',
|
76
77
|
'2013020971':'On March 10th, 2014, Stars forward Rich Peverley suffered from a cardiac episode midgame and as a result, the remainder of the game was postponed. \nThe game resumed on April 9th, and the only goal scorer in the game, Blue Jackets forward Nathan Horton, did not appear in the resumed game due to injury. Interestingly, Horton would never play in the NHL again.',
|
77
78
|
'2018021133':'Game between Lightning and Capitals has incorrectly labeled event teams (i.e. WSH TAKEAWAY - #71 CIRELLI (Cirelli is a Tampa Bay skater in this game)).',
|
78
79
|
'2019020876':'Due to the frightening collapse of Blues defensemen Jay Bouwmeester, a game on February 2nd, 2020 between the Ducks and Blues was postponed. \nWhen the game resumed, Ducks defensemen Hampus Lindholm, who assisted on a goal in the inital game, did not play in the resumed match.'
|
79
80
|
}
|
80
81
|
|
81
|
-
name_change = {
|
82
|
-
"":"",
|
83
|
-
}
|
84
|
-
|
85
82
|
shot_types = ['wrist','deflected','tip-in','slap','backhand','snap','wrap-around','poke','bat','cradle','between-legs']
|
86
83
|
|
87
84
|
new = 2024
|
@@ -107,14 +104,22 @@ standings_end = {
|
|
107
104
|
'20242025':'04-17'
|
108
105
|
}
|
109
106
|
|
107
|
+
events = ['faceoff','hit','giveaway','takeaway','blocked-shot','missed-shot','shot-on-goal','goal','penalty']
|
108
|
+
|
109
|
+
dir = os.path.dirname(os.path.realpath(__file__))
|
110
|
+
schedule_path = os.path.join(dir,'tools\\schedule\\schedule.csv')
|
111
|
+
info_path = os.path.join(dir,'tools\\teaminfo\\nhl_teaminfo.csv')
|
112
|
+
default_roster = os.path.join(dir,'tools\\rosters\\nhl_rosters.csv')
|
113
|
+
|
110
114
|
## SCRAPE FUNCTIONS ##
|
111
|
-
def nhl_scrape_game(game_ids,split_shifts = False, remove = ['period-start','period-end','challenge','stoppage'],verbose = False, errors = False):
|
115
|
+
def nhl_scrape_game(game_ids,split_shifts = False, remove = ['period-start','period-end','challenge','stoppage','shootout-complete','game-end'],verbose = False, sources = False, errors = False):
|
112
116
|
#Given a set of game_ids (NHL API), return complete play-by-play information as requested
|
113
117
|
# param 'game_ids' - NHL game ids (or list formatted as ['random', num_of_games, start_year, end_year])
|
114
118
|
# param 'split_shifts' - boolean which splits pbp and shift events if true
|
115
119
|
# param 'remove' - list of events to remove from final dataframe
|
116
120
|
# param 'xg' - xG model to apply to pbp for aggregation
|
117
121
|
# param 'verbose' - boolean which adds additional event info if true
|
122
|
+
# param 'sources - boolean scraping the html and json sources to a master directory if true
|
118
123
|
# param 'errors' - boolean returning game ids which did not scrape if true
|
119
124
|
|
120
125
|
pbps = []
|
@@ -154,6 +159,7 @@ def nhl_scrape_game(game_ids,split_shifts = False, remove = ['period-start','per
|
|
154
159
|
#Scrape each game
|
155
160
|
#Track Errors
|
156
161
|
error_ids = []
|
162
|
+
prog = 0
|
157
163
|
for game_id in game_ids:
|
158
164
|
print("Scraping data from game " + str(game_id) + "...",end="")
|
159
165
|
start = time.perf_counter()
|
@@ -161,15 +167,25 @@ def nhl_scrape_game(game_ids,split_shifts = False, remove = ['period-start','per
|
|
161
167
|
try:
|
162
168
|
#Retrieve data
|
163
169
|
info = get_game_info(game_id)
|
164
|
-
data = combine_data(info)
|
170
|
+
data = combine_data(info, sources)
|
165
171
|
|
166
172
|
#Append data to list
|
167
173
|
pbps.append(data)
|
168
174
|
|
169
175
|
end = time.perf_counter()
|
170
176
|
secs = end - start
|
171
|
-
|
177
|
+
prog += 1
|
178
|
+
|
179
|
+
#Export if sources is true
|
180
|
+
if sources:
|
181
|
+
dirs = f'sources/{info['season']}/'
|
182
|
+
|
183
|
+
if not os.path.exists(dirs):
|
184
|
+
os.makedirs(dirs)
|
172
185
|
|
186
|
+
data.to_csv(f'{dirs}{info['game_id']}.csv',index=False)
|
187
|
+
|
188
|
+
print(f" finished in {secs:.2f} seconds. {prog}/{len(game_ids)} ({(prog/len(game_ids))*100:.2f}%)")
|
173
189
|
except:
|
174
190
|
#Games such as the all-star game and pre-season games will incur this error
|
175
191
|
#Other games have known problems
|
@@ -180,7 +196,7 @@ def nhl_scrape_game(game_ids,split_shifts = False, remove = ['period-start','per
|
|
180
196
|
|
181
197
|
#Track error
|
182
198
|
error_ids.append(game_id)
|
183
|
-
|
199
|
+
|
184
200
|
#Add all pbps together
|
185
201
|
if len(pbps) == 0:
|
186
202
|
print("\rNo data returned.")
|
@@ -256,30 +272,37 @@ def nhl_scrape_schedule(season,start = "09-01", end = "08-01"):
|
|
256
272
|
#Handles dates which are over a year apart
|
257
273
|
day = 365 + day
|
258
274
|
for i in range(day):
|
259
|
-
#For each day, call NHL api and retreive
|
275
|
+
#For each day, call NHL api and retreive info on all games of selected game
|
260
276
|
inc = start+timedelta(days=i)
|
261
277
|
print("Scraping games on " + str(inc)[:10]+"...")
|
262
278
|
|
263
279
|
get = rs.get(api+str(inc)[:10]).json()
|
264
|
-
gameWeek = list(pd.json_normalize(get['gameWeek'])['games'])[0]
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
280
|
+
gameWeek = pd.json_normalize(list(pd.json_normalize(get['gameWeek'])['games'])[0])
|
281
|
+
|
282
|
+
#Return nothing if there's nothing
|
283
|
+
if gameWeek.empty:
|
284
|
+
game.append(gameWeek)
|
285
|
+
else:
|
286
|
+
gameWeek['date'] = get['gameWeek'][0]['date']
|
287
|
+
|
288
|
+
gameWeek['season_type'] = gameWeek['gameType']
|
289
|
+
gameWeek['away_team_abbr'] = gameWeek['awayTeam.abbrev']
|
290
|
+
gameWeek['home_team_abbr'] = gameWeek['homeTeam.abbrev']
|
291
|
+
gameWeek['game_title'] = gameWeek['away_team_abbr'] + " @ " + gameWeek['home_team_abbr'] + " - " + gameWeek['date']
|
292
|
+
gameWeek['estStartTime'] = pd.to_datetime(gameWeek['startTimeUTC']).dt.tz_convert('US/Eastern').dt.strftime("%I:%M %p")
|
293
|
+
|
294
|
+
front_col = ['id','season','date','season_type','game_title','away_team_abbr','home_team_abbr','estStartTime']
|
295
|
+
gameWeek = gameWeek[front_col+[col for col in gameWeek.columns.to_list() if col not in front_col]]
|
296
|
+
|
297
|
+
game.append(gameWeek)
|
298
|
+
|
276
299
|
#Concatenate all games
|
277
300
|
df = pd.concat(game)
|
278
301
|
|
279
302
|
#Return: specificed schedule data
|
280
303
|
return df
|
281
304
|
|
282
|
-
def nhl_scrape_season(season,split_shifts = False, season_types = [2,3], remove = ['period-start','period-end','game-end','challenge','stoppage'], start = "09-01", end = "08-01", local=False, local_path =
|
305
|
+
def nhl_scrape_season(season,split_shifts = False, season_types = [2,3], remove = ['period-start','period-end','game-end','challenge','stoppage'], start = "09-01", end = "08-01", local=False, local_path = schedule_path, verbose = False, sources = False, errors = False):
|
283
306
|
#Given season, scrape all play-by-play occuring within the season
|
284
307
|
# param 'season' - NHL season to scrape
|
285
308
|
# param 'split_shifts' - boolean which splits pbp and shift events if true
|
@@ -289,12 +312,21 @@ def nhl_scrape_season(season,split_shifts = False, season_types = [2,3], remove
|
|
289
312
|
# param 'local' - boolean indicating whether to use local file to scrape game_ids
|
290
313
|
# param 'local_path' - path of local file
|
291
314
|
# param 'verbose' - boolean which adds additional event info if true
|
315
|
+
# param 'sources - boolean scraping the html and json sources to a master directory if true
|
292
316
|
# param 'errors' - boolean returning game ids which did not scrape if true
|
293
317
|
|
294
318
|
#Determine whether to use schedule data in repository or to scrape
|
295
|
-
if local
|
319
|
+
if local:
|
296
320
|
load = pd.read_csv(local_path)
|
297
|
-
load
|
321
|
+
load['date'] = pd.to_datetime(load['date'])
|
322
|
+
|
323
|
+
start = f'{(season[0:4] if int(start[0:2])>=9 else season[4:8])}-{int(start[0:2])}-{int(start[3:5])}'
|
324
|
+
end = f'{(season[0:4] if int(end[0:2])>=9 else season[4:8])}-{int(end[0:2])}-{int(end[3:5])}'
|
325
|
+
|
326
|
+
load = load.loc[(load['season'].astype(str)==season)&
|
327
|
+
(load['season_type'].isin(season_types))&
|
328
|
+
(load['date']>=start)&(load['date']<=end)]
|
329
|
+
|
298
330
|
game_ids = list(load['id'].astype(str))
|
299
331
|
else:
|
300
332
|
load = nhl_scrape_schedule(season,start,end)
|
@@ -310,31 +342,17 @@ def nhl_scrape_season(season,split_shifts = False, season_types = [2,3], remove
|
|
310
342
|
start = time.perf_counter()
|
311
343
|
|
312
344
|
#Perform scrape
|
313
|
-
if split_shifts
|
314
|
-
data = nhl_scrape_game(game_ids,split_shifts=True,remove=remove,verbose=verbose,errors=errors)
|
345
|
+
if split_shifts:
|
346
|
+
data = nhl_scrape_game(game_ids,split_shifts=True,remove=remove,verbose=verbose,sources=sources,errors=errors)
|
315
347
|
else:
|
316
|
-
data = nhl_scrape_game(game_ids,remove=remove,verbose=verbose,errors=errors)
|
348
|
+
data = nhl_scrape_game(game_ids,remove=remove,verbose=verbose,sources=sources,errors=errors)
|
317
349
|
|
318
350
|
end = time.perf_counter()
|
319
351
|
secs = end - start
|
320
352
|
|
321
353
|
print(f'Finished season scrape in {(secs/60)/60:.2f} hours.')
|
322
354
|
#Return: Complete pbp and shifts data for specified season as well as dataframe of game_ids which failed to return data
|
323
|
-
|
324
|
-
pbp_dict = {'pbp':data['pbp'],
|
325
|
-
'shifts':data['shifts']}
|
326
|
-
|
327
|
-
if errors:
|
328
|
-
pbp_dict.update({'errors':data['errors']})
|
329
|
-
return pbp_dict
|
330
|
-
else:
|
331
|
-
pbp = data
|
332
|
-
if errors:
|
333
|
-
pbp_dict = {'pbp':pbp,
|
334
|
-
'errors':data['errors']}
|
335
|
-
return pbp_dict
|
336
|
-
else:
|
337
|
-
return pbp
|
355
|
+
return data
|
338
356
|
|
339
357
|
def nhl_scrape_seasons_info(seasons = []):
|
340
358
|
#Returns info related to NHL seasons (by default, all seasons are included)
|
@@ -390,7 +408,7 @@ def nhl_scrape_roster(season):
|
|
390
408
|
#Given a nhl season, return rosters for all participating teams
|
391
409
|
# param 'season' - NHL season to scrape
|
392
410
|
print("Scrpaing rosters for the "+ season + "season...")
|
393
|
-
teaminfo = pd.read_csv(
|
411
|
+
teaminfo = pd.read_csv(info_path)
|
394
412
|
|
395
413
|
rosts = []
|
396
414
|
for team in list(teaminfo['Team']):
|
@@ -449,17 +467,24 @@ def nhl_scrape_team_info(country = False):
|
|
449
467
|
|
450
468
|
return data.sort_values(by=(['country3Code','countryCode','iocCode','countryName'] if country else ['fullName','triCode','id']))
|
451
469
|
|
452
|
-
def nhl_scrape_player_data(
|
470
|
+
def nhl_scrape_player_data(player_ids):
|
453
471
|
#Given player id, return player information
|
454
|
-
|
472
|
+
infos = []
|
473
|
+
for player_id in player_ids:
|
474
|
+
player_id = int(player_id)
|
475
|
+
api = f'https://api-web.nhle.com/v1/player/{player_id}/landing'
|
476
|
+
|
477
|
+
data = pd.json_normalize(rs.get(api).json())
|
455
478
|
|
456
|
-
|
479
|
+
#Add name column
|
480
|
+
data['fullName'] = (data['firstName.default'] + " " + data['lastName.default']).str.upper()
|
457
481
|
|
458
|
-
|
459
|
-
|
482
|
+
#Append
|
483
|
+
infos.append(data)
|
460
484
|
|
485
|
+
df = pd.concat(infos)
|
461
486
|
#Return: player data
|
462
|
-
return
|
487
|
+
return df
|
463
488
|
|
464
489
|
def nhl_scrape_draft_rankings(arg = 'now', category = ''):
|
465
490
|
#Given url argument for timeframe and prospect category, return draft rankings
|
@@ -478,12 +503,24 @@ def nhl_scrape_draft_rankings(arg = 'now', category = ''):
|
|
478
503
|
#Return: prospect rankings
|
479
504
|
return data
|
480
505
|
|
481
|
-
def
|
506
|
+
def nhl_apply_xG(pbp):
|
507
|
+
#Given play-by-play data, return this data with xG-related columns
|
508
|
+
|
509
|
+
#param 'pbp' - play-by-play data
|
510
|
+
|
511
|
+
print(f'Applying WSBA xG to model with seasons: {pbp['season'].drop_duplicates().to_list()}')
|
512
|
+
|
513
|
+
#Apply xG model
|
514
|
+
pbp = wsba_xG(pbp)
|
515
|
+
|
516
|
+
return pbp
|
517
|
+
|
518
|
+
def nhl_shooting_impacts(agg,type):
|
482
519
|
#Given stats table generated from the nhl_calculate_stats function, return table with shot impacts
|
483
520
|
#Only 5v5 is supported as of now
|
484
521
|
|
485
522
|
#param 'agg' - stats table
|
486
|
-
#param '
|
523
|
+
#param 'type' - type of stats to calculate ('skater', 'goalie', or 'team')
|
487
524
|
|
488
525
|
#COMPOSITE IMPACT EVALUATIONS:
|
489
526
|
|
@@ -509,7 +546,7 @@ def nhl_shooting_impacts(agg,team=False):
|
|
509
546
|
|
510
547
|
return rate+qual+fini
|
511
548
|
|
512
|
-
if
|
549
|
+
if type == 'goalie':
|
513
550
|
pos = agg
|
514
551
|
for group in [('OOFF','F'),('ODEF','A')]:
|
515
552
|
#Have to set this columns for compatibility with df.apply
|
@@ -536,35 +573,172 @@ def nhl_shooting_impacts(agg,team=False):
|
|
536
573
|
pos[f'{group[0]}-SRI'] = pos['g'] - pos.apply(lambda x: goal_comp(avg_fen,x.xg_fen,x.xg,x.g,avg_fsh),axis=1)
|
537
574
|
pos[f'{group[0]}-SQI'] = pos['g'] - pos.apply(lambda x: goal_comp(x.fenwick,avg_xg_fen,x.xg,x.g,avg_fsh),axis=1)
|
538
575
|
pos[f'{group[0]}-FNI'] = pos['g'] - pos.apply(lambda x: goal_comp(x.fenwick,x.xg_fen,avg_xg,avg_g,avg_fsh),axis=1)
|
576
|
+
|
577
|
+
#Convert impacts to totals
|
578
|
+
#Calculate shot rate, shot quality, and finishing impacts
|
579
|
+
pos[f'{group[0]}-SRI-T'] = (pos[f'{group[0]}-SRI']/60)*pos['TOI']
|
580
|
+
pos[f'{group[0]}-SQI-T'] = (pos[f'{group[0]}-SQI']/60)*pos['TOI']
|
581
|
+
pos[f'{group[0]}-FNI-T'] = (pos[f'{group[0]}-FNI']/60)*pos['TOI']
|
539
582
|
|
583
|
+
#Rank per 60 stats
|
584
|
+
for stat in ['FF','FA','xGF','xGA','GF','GA','CF','CA','GSAx']:
|
585
|
+
pos[f'{stat}/60-P'] = pos[f'{stat}/60'].rank(pct=True)
|
586
|
+
|
587
|
+
#Flip percentiles for against stats
|
588
|
+
for stat in ['FA','xGA','GA','CA']:
|
589
|
+
pos[f'{stat}/60-P'] = 1-pos[f'{stat}/60-P']
|
590
|
+
|
540
591
|
#Add extra metrics
|
541
592
|
pos['RushF/60'] = (pos['RushF']/pos['TOI'])*60
|
542
593
|
pos['RushA/60'] = (pos['RushA']/pos['TOI'])*60
|
543
|
-
pos['
|
544
|
-
pos['
|
594
|
+
pos['RushesFF'] = pos['RushF/60'].rank(pct=True)
|
595
|
+
pos['RushesFA'] = 1 - pos['RushA/60'].rank(pct=True)
|
545
596
|
pos['RushFxG/60'] = (pos['RushFxG']/pos['TOI'])*60
|
546
597
|
pos['RushAxG/60'] = (pos['RushAxG']/pos['TOI'])*60
|
547
|
-
pos['
|
548
|
-
pos['
|
598
|
+
pos['RushesxGF'] = pos['RushFxG/60'].rank(pct=True)
|
599
|
+
pos['RushesxGA'] = 1 - pos['RushAxG/60'].rank(pct=True)
|
549
600
|
pos['RushFG/60'] = (pos['RushFG']/pos['TOI'])*60
|
550
601
|
pos['RushAG/60'] = (pos['RushAG']/pos['TOI'])*60
|
551
|
-
pos['
|
552
|
-
pos['
|
602
|
+
pos['RushesGF'] = pos['RushFG/60'].rank(pct=True)
|
603
|
+
pos['RushesGA'] = 1 - pos['RushAG/60'].rank(pct=True)
|
553
604
|
|
554
605
|
#Flip against metric percentiles
|
555
606
|
pos['ODEF-SR'] = 1-pos['ODEF-SR']
|
556
607
|
pos['ODEF-SQ'] = 1-pos['ODEF-SQ']
|
557
608
|
pos['ODEF-FN'] = 1-pos['ODEF-FN']
|
558
609
|
|
610
|
+
#Extraneous Values
|
611
|
+
pos['EGF'] = pos['OOFF-SRI']+pos['OOFF-SQI']+pos['OOFF-FNI']
|
612
|
+
pos['ExGF'] = pos['OOFF-SRI']+pos['OOFF-SQI']
|
613
|
+
pos['EGA'] = pos['ODEF-SRI']+pos['ODEF-SQI']+pos['ODEF-FNI']
|
614
|
+
pos['ExGA'] = pos['ODEF-SRI']+pos['ODEF-SQI']
|
615
|
+
|
616
|
+
#...and their percentiles
|
617
|
+
pos['EGF-P'] = pos['EGF'].rank(pct=True)
|
618
|
+
pos['ExGF-P'] = pos['ExGF'].rank(pct=True)
|
619
|
+
pos['EGA-P'] = pos['EGA'].rank(pct=True)
|
620
|
+
pos['ExGA-P'] = pos['ExGA'].rank(pct=True)
|
621
|
+
|
622
|
+
pos['EGA-P'] = 1-pos['EGA']
|
623
|
+
pos['ExGA-P'] = 1-pos['ExGA']
|
624
|
+
|
625
|
+
#...and then their totals
|
626
|
+
pos['EGF-T'] = (pos['EGF']/60)*pos['TOI']
|
627
|
+
pos['ExGF-T'] = (pos['ExGF']/60)*pos['TOI']
|
628
|
+
pos['EGA-T'] = (pos['EGA']/60)*pos['TOI']
|
629
|
+
pos['ExGA-T'] = (pos['ExGA']/60)*pos['TOI']
|
630
|
+
|
631
|
+
#Goal Composites...
|
632
|
+
pos['Team-Adjusted-EGI'] = pos['ODEF-FNI']-pos['ExGA']
|
633
|
+
pos['GISAx'] = pos['ExGA']-pos['EGA']
|
634
|
+
pos['NetGI'] = pos['EGF'] - pos['EGA']
|
635
|
+
pos['NetxGI'] = pos['ExGF'] - pos['ExGA']
|
636
|
+
|
637
|
+
#...and their percentiles
|
638
|
+
pos['Team-Adjusted-EGI-P'] = pos['Team-Adjusted-EGI'].rank(pct=True)
|
639
|
+
pos['GISAx-P'] = pos['GISAx'].rank(pct=True)
|
640
|
+
pos['NetGI-P'] = pos['NetGI'].rank(pct=True)
|
641
|
+
pos['NetxGI-P'] = pos['NetxGI'].rank(pct=True)
|
642
|
+
|
643
|
+
#...and then their totals
|
644
|
+
pos['Team-Adjusted-EGI-T'] = (pos['Team-Adjusted-EGI']/60)*pos['TOI']
|
645
|
+
pos['GISAx-T'] = (pos['GISAx']/60)*pos['TOI']
|
646
|
+
pos['NetGI-T'] = (pos['NetGI']/60)*pos['TOI']
|
647
|
+
pos['NetxGI-T'] = (pos['NetxGI']/60)*pos['TOI']
|
648
|
+
|
559
649
|
#Return: team stats with shooting impacts
|
560
|
-
return pos.drop(columns=['fsh','fenwick','xg_fen','xg','g','finishing']).sort_values(['Season','Team'])
|
650
|
+
return pos.drop(columns=['fsh','fenwick','xg_fen','xg','g','finishing']).sort_values(['Goalie','Season','Team'])
|
561
651
|
|
652
|
+
elif type =='team':
|
653
|
+
pos = agg
|
654
|
+
for group in [('OOFF','F'),('ODEF','A')]:
|
655
|
+
#Have to set this columns for compatibility with df.apply
|
656
|
+
pos['fsh'] = pos[f'Fsh{group[1]}%']
|
657
|
+
pos['fenwick'] = pos[f'F{group[1]}/60']
|
658
|
+
pos['xg'] = pos[f'xG{group[1]}/60']
|
659
|
+
pos['g'] = pos[f'G{group[1]}/60']
|
660
|
+
pos['xg_fen'] = pos[f'xG{group[1]}/F{group[1]}']
|
661
|
+
pos['finishing'] = pos[f'G{group[1]}/xG{group[1]}']
|
662
|
+
|
663
|
+
#Find average for position in frame
|
664
|
+
avg_fen = pos['fenwick'].mean()
|
665
|
+
avg_xg = pos['xg'].mean()
|
666
|
+
avg_g = pos['g'].mean()
|
667
|
+
avg_fsh = avg_g/avg_fen
|
668
|
+
avg_xg_fen = avg_xg/avg_fen
|
669
|
+
|
670
|
+
#Calculate composite percentiles
|
671
|
+
pos[f'{group[0]}-SR'] = pos['fenwick'].rank(pct=True)
|
672
|
+
pos[f'{group[0]}-SQ'] = pos['xg_fen'].rank(pct=True)
|
673
|
+
pos[f'{group[0]}-FN'] = pos['finishing'].rank(pct=True)
|
674
|
+
|
675
|
+
#Calculate shot rate, shot quality, and finishing impacts
|
676
|
+
pos[f'{group[0]}-SRI'] = pos['g'] - pos.apply(lambda x: goal_comp(avg_fen,x.xg_fen,x.xg,x.g,avg_fsh),axis=1)
|
677
|
+
pos[f'{group[0]}-SQI'] = pos['g'] - pos.apply(lambda x: goal_comp(x.fenwick,avg_xg_fen,x.xg,x.g,avg_fsh),axis=1)
|
678
|
+
pos[f'{group[0]}-FNI'] = pos['g'] - pos.apply(lambda x: goal_comp(x.fenwick,x.xg_fen,avg_xg,avg_g,avg_fsh),axis=1)
|
679
|
+
|
680
|
+
#Convert impacts to totals
|
681
|
+
#Calculate shot rate, shot quality, and finishing impacts
|
682
|
+
pos[f'{group[0]}-SRI-T'] = (pos[f'{group[0]}-SRI']/60)*pos['TOI']
|
683
|
+
pos[f'{group[0]}-SQI-T'] = (pos[f'{group[0]}-SQI']/60)*pos['TOI']
|
684
|
+
pos[f'{group[0]}-FNI-T'] = (pos[f'{group[0]}-FNI']/60)*pos['TOI']
|
685
|
+
|
686
|
+
#Rank per 60 stats
|
687
|
+
for stat in per_sixty[10:len(per_sixty)]:
|
688
|
+
pos[f'{stat}/60-P'] = pos[f'{stat}/60'].rank(pct=True)
|
689
|
+
|
690
|
+
#Flip percentiles for against stats
|
691
|
+
for stat in ['FA','xGA','GA','CA','HA','Give','Penl','Penl2','Penl5']:
|
692
|
+
pos[f'{stat}/60-P'] = 1-pos[f'{stat}/60-P']
|
693
|
+
|
694
|
+
#Add extra metrics
|
695
|
+
pos['RushF/60'] = (pos['RushF']/pos['TOI'])*60
|
696
|
+
pos['RushA/60'] = (pos['RushA']/pos['TOI'])*60
|
697
|
+
pos['RushesFF'] = pos['RushF/60'].rank(pct=True)
|
698
|
+
pos['RushesFA'] = 1 - pos['RushA/60'].rank(pct=True)
|
699
|
+
pos['RushFxG/60'] = (pos['RushFxG']/pos['TOI'])*60
|
700
|
+
pos['RushAxG/60'] = (pos['RushAxG']/pos['TOI'])*60
|
701
|
+
pos['RushesxGF'] = pos['RushFxG/60'].rank(pct=True)
|
702
|
+
pos['RushesxGA'] = 1 - pos['RushAxG/60'].rank(pct=True)
|
703
|
+
pos['RushFG/60'] = (pos['RushFG']/pos['TOI'])*60
|
704
|
+
pos['RushAG/60'] = (pos['RushAG']/pos['TOI'])*60
|
705
|
+
pos['RushesGF'] = pos['RushFG/60'].rank(pct=True)
|
706
|
+
pos['RushesGA'] = 1 - pos['RushAG/60'].rank(pct=True)
|
707
|
+
|
708
|
+
#Flip against metric percentiles
|
709
|
+
pos['ODEF-SR'] = 1-pos['ODEF-SR']
|
710
|
+
pos['ODEF-SQ'] = 1-pos['ODEF-SQ']
|
711
|
+
pos['ODEF-FN'] = 1-pos['ODEF-FN']
|
712
|
+
|
713
|
+
pos['EGF'] = pos['OOFF-SRI']+pos['OOFF-SQI']+pos['OOFF-FNI']
|
714
|
+
pos['ExGF'] = pos['OOFF-SRI']+pos['OOFF-SQI']
|
715
|
+
pos['EGA'] = pos['ODEF-SRI']+pos['ODEF-SQI']+pos['ODEF-FNI']
|
716
|
+
pos['ExGA'] = pos['ODEF-SRI']+pos['ODEF-SQI']
|
717
|
+
|
718
|
+
#...and their percentiles
|
719
|
+
pos['EGF-P'] = pos['EGF'].rank(pct=True)
|
720
|
+
pos['ExGF-P'] = pos['ExGF'].rank(pct=True)
|
721
|
+
pos['EGA-P'] = pos['EGA'].rank(pct=True)
|
722
|
+
pos['ExGA-P'] = pos['ExGA'].rank(pct=True)
|
723
|
+
|
724
|
+
pos['EGA-P'] = 1-pos['EGA']
|
725
|
+
pos['ExGA-P'] = 1-pos['ExGA']
|
726
|
+
|
727
|
+
#...and then their totals
|
728
|
+
pos['EGF-T'] = (pos['EGF']/60)*pos['TOI']
|
729
|
+
pos['ExGF-T'] = (pos['ExGF']/60)*pos['TOI']
|
730
|
+
pos['EGA-T'] = (pos['EGA']/60)*pos['TOI']
|
731
|
+
pos['ExGA-T'] = (pos['ExGA']/60)*pos['TOI']
|
732
|
+
|
733
|
+
#Return: team stats with shooting impacts
|
734
|
+
return pos.drop(columns=['fsh','fenwick','xg_fen','xg','g','finishing']).sort_values(['Season','Team'])
|
562
735
|
|
563
736
|
else:
|
564
737
|
#Remove skaters with less than 150 minutes of TOI then split between forwards and dmen
|
565
|
-
|
566
|
-
forwards = agg.loc[agg['Position']!='D']
|
567
|
-
defensemen = agg.loc[agg['Position']=='D']
|
738
|
+
#These are added back in after the fact
|
739
|
+
forwards = agg.loc[(agg['Position']!='D')&(agg['TOI']>=150)]
|
740
|
+
defensemen = agg.loc[(agg['Position']=='D')&(agg['TOI']>=150)]
|
741
|
+
non_players = agg.loc[agg['TOI']<150]
|
568
742
|
|
569
743
|
#Loop through both positions, all groupings (INDV, OOFF, and ODEF) generating impacts
|
570
744
|
for pos in [forwards,defensemen]:
|
@@ -594,15 +768,29 @@ def nhl_shooting_impacts(agg,team=False):
|
|
594
768
|
pos[f'{group[0]}-SQI'] = pos['g'] - pos.apply(lambda x: goal_comp(x.fenwick,avg_xg_fen,x.xg,x.g,avg_fsh),axis=1)
|
595
769
|
pos[f'{group[0]}-FNI'] = pos['g'] - pos.apply(lambda x: goal_comp(x.fenwick,x.xg_fen,avg_xg,avg_g,avg_fsh),axis=1)
|
596
770
|
|
771
|
+
#Convert impacts to totals
|
772
|
+
#Calculate shot rate, shot quality, and finishing impacts
|
773
|
+
pos[f'{group[0]}-SRI-T'] = (pos[f'{group[0]}-SRI']/60)*pos['TOI']
|
774
|
+
pos[f'{group[0]}-SQI-T'] = (pos[f'{group[0]}-SQI']/60)*pos['TOI']
|
775
|
+
pos[f'{group[0]}-FNI-T'] = (pos[f'{group[0]}-FNI']/60)*pos['TOI']
|
776
|
+
|
597
777
|
#Calculate On-Ice Involvement Percentiles
|
598
|
-
pos['
|
599
|
-
pos['
|
600
|
-
pos['
|
601
|
-
pos['
|
602
|
-
pos['
|
603
|
-
pos['
|
604
|
-
pos['
|
605
|
-
pos['
|
778
|
+
pos['Fi/F'] = pos['FC%'].rank(pct=True)
|
779
|
+
pos['xGi/F'] = pos['xGC%'].rank(pct=True)
|
780
|
+
pos['Pi/F'] = pos['GI%'].rank(pct=True)
|
781
|
+
pos['Gi/F'] = pos['GC%'].rank(pct=True)
|
782
|
+
pos['RushFi/60'] = (pos['Rush']/pos['TOI'])*60
|
783
|
+
pos['RushxGi/60'] = (pos['Rush xG']/pos['TOI'])*60
|
784
|
+
pos['RushesxGi'] = pos['RushxGi/60'].rank(pct=True)
|
785
|
+
pos['RushesFi'] = pos['RushFi/60'].rank(pct=True)
|
786
|
+
|
787
|
+
#Rank per 60 stats
|
788
|
+
for stat in per_sixty:
|
789
|
+
pos[f'{stat}/60-P'] = pos[f'{stat}/60'].rank(pct=True)
|
790
|
+
|
791
|
+
#Flip percentiles for against stats
|
792
|
+
for stat in ['FA','xGA','GA','CA','HA','Give','Penl','Penl2','Penl5']:
|
793
|
+
pos[f'{stat}/60-P'] = 1-pos[f'{stat}/60-P']
|
606
794
|
|
607
795
|
#Add positions back together
|
608
796
|
complete = pd.concat([forwards,defensemen])
|
@@ -613,108 +801,253 @@ def nhl_shooting_impacts(agg,team=False):
|
|
613
801
|
complete['ODEF-FN'] = 1-complete['ODEF-FN']
|
614
802
|
|
615
803
|
#Extraneous Values
|
616
|
-
complete['
|
617
|
-
complete['
|
618
|
-
complete['
|
619
|
-
complete['
|
620
|
-
complete['
|
621
|
-
complete['
|
622
|
-
|
623
|
-
|
624
|
-
complete['
|
625
|
-
complete['
|
626
|
-
complete['
|
627
|
-
complete['
|
628
|
-
complete['
|
629
|
-
complete['
|
630
|
-
|
804
|
+
complete['EGi'] = complete['INDV-SRI']+complete['INDV-SQI']+complete['INDV-FNI']
|
805
|
+
complete['ExGi'] = complete['INDV-SRI']+complete['INDV-SQI']
|
806
|
+
complete['EGF'] = complete['OOFF-SRI']+complete['OOFF-SQI']+complete['OOFF-FNI']
|
807
|
+
complete['ExGF'] = complete['OOFF-SRI']+complete['OOFF-SQI']
|
808
|
+
complete['EGA'] = complete['ODEF-SRI']+complete['ODEF-SQI']+complete['ODEF-FNI']
|
809
|
+
complete['ExGA'] = complete['ODEF-SRI']+complete['ODEF-SQI']
|
810
|
+
|
811
|
+
#...and their percentiles
|
812
|
+
complete['EGi-P'] = complete['EGi'].rank(pct=True)
|
813
|
+
complete['ExGi-P'] = complete['ExGi'].rank(pct=True)
|
814
|
+
complete['EGF-P'] = complete['EGF'].rank(pct=True)
|
815
|
+
complete['ExGF-P'] = complete['ExGF'].rank(pct=True)
|
816
|
+
complete['EGA-P'] = complete['EGA'].rank(pct=True)
|
817
|
+
complete['ExGA-P'] = complete['ExGA'].rank(pct=True)
|
818
|
+
|
819
|
+
complete['EGA-P'] = 1-complete['EGA']
|
820
|
+
complete['ExGA-P'] = 1-complete['ExGA']
|
821
|
+
|
822
|
+
#...and then their totals
|
823
|
+
complete['EGi-T'] = (complete['EGi']/60)*complete['TOI']
|
824
|
+
complete['ExGi-T'] = (complete['ExGi']/60)*complete['TOI']
|
825
|
+
complete['EGF-T'] = (complete['EGF']/60)*complete['TOI']
|
826
|
+
complete['ExGF-T'] = (complete['ExGF']/60)*complete['TOI']
|
827
|
+
complete['EGA-T'] = (complete['EGA']/60)*complete['TOI']
|
828
|
+
complete['ExGA-T'] = (complete['ExGA']/60)*complete['TOI']
|
829
|
+
|
830
|
+
#Goal Composites...
|
831
|
+
complete['LiEG'] = complete['EGF'] - complete['EGi']
|
832
|
+
complete['LiExG'] = complete['ExGF'] - complete['ExGi']
|
833
|
+
complete['LiGIn'] = complete['LiEG']*complete['AC%']
|
834
|
+
complete['LixGIn'] = complete['LiExG']*complete['AC%']
|
835
|
+
complete['ALiGIn'] = complete['LiGIn']-complete['LixGIn']
|
836
|
+
complete['CompGI'] = complete['EGi'] + complete['LiGIn']
|
837
|
+
complete['LiRelGI'] = complete['CompGI'] - (complete['EGF']-complete['CompGI'])
|
838
|
+
complete['NetGI'] = complete['EGF'] - complete['EGA']
|
839
|
+
complete['NetxGI'] = complete['ExGF'] - complete['ExGA']
|
840
|
+
|
841
|
+
#...and their percentiles
|
842
|
+
complete['LiEG-P'] = complete['LiEG'].rank(pct=True)
|
843
|
+
complete['LiExG-P'] = complete['LiExG'].rank(pct=True)
|
844
|
+
complete['LiGIn-P'] = complete['LiGIn'].rank(pct=True)
|
845
|
+
complete['LixGIn-P'] = complete['LixGIn'].rank(pct=True)
|
846
|
+
complete['ALiGIn-P'] = complete['ALiGIn'].rank(pct=True)
|
847
|
+
complete['CompGI-P'] = complete['CompGI'].rank(pct=True)
|
848
|
+
complete['LiRelGI-P'] = complete['LiRelGI'].rank(pct=True)
|
849
|
+
complete['NetGI-P'] = complete['NetGI'].rank(pct=True)
|
850
|
+
complete['NetxGI-P'] = complete['NetxGI'].rank(pct=True)
|
851
|
+
|
852
|
+
#..and then their totals
|
853
|
+
complete['LiEG-T'] = (complete['LiEG']/60)*complete['TOI']
|
854
|
+
complete['LiExG-T'] = (complete['LiExG']/60)*complete['TOI']
|
855
|
+
complete['LiGIn-T'] = (complete['LiGIn']/60)*complete['TOI']
|
856
|
+
complete['LixGIn-T'] = (complete['LixGIn']/60)*complete['TOI']
|
857
|
+
complete['ALiGIn-T'] = (complete['ALiGIn']/60)*complete['TOI']
|
858
|
+
complete['CompGI-T'] = (complete['CompGI']/60)*complete['TOI']
|
859
|
+
complete['LiRelGI-T'] = (complete['LiRelGI']/60)*complete['TOI']
|
860
|
+
complete['NetGI-T'] = (complete['NetGI']/60)*complete['TOI']
|
861
|
+
complete['NetxGI-T'] = (complete['NetxGI']/60)*complete['TOI']
|
862
|
+
|
863
|
+
#Add back skaters with less than 150 minutes TOI
|
864
|
+
df = pd.concat([complete,non_players]).drop(columns=['fsh','fenwick','xg_fen','xg','g','finishing']).sort_values(['Player','Season','Team','ID'])
|
631
865
|
#Return: skater stats with shooting impacts
|
632
|
-
return
|
866
|
+
return df
|
633
867
|
|
634
|
-
def nhl_calculate_stats(pbp,type,season_types,game_strength,
|
868
|
+
def nhl_calculate_stats(pbp,type,season_types,game_strength,split_game=False,roster_path=default_roster,shot_impact=False):
|
635
869
|
#Given play-by-play, seasonal information, game_strength, rosters, and xG model, return aggregated stats
|
636
870
|
# param 'pbp' - play-by-play dataframe
|
637
|
-
# param 'type' - type of stats to calculate ('skater', '
|
871
|
+
# param 'type' - type of stats to calculate ('skater', 'goalie', or 'team')
|
638
872
|
# param 'season' - season or timeframe of events in play-by-play
|
639
873
|
# param 'season_type' - list of season types (preseason, regular season, or playoffs) to include in aggregation
|
640
874
|
# param 'game_strength' - list of game_strengths to include in aggregation
|
875
|
+
# param 'split_game' - boolean which if true groups aggregation by game
|
641
876
|
# param 'roster_path' - path to roster file
|
642
|
-
# param 'xg' - xG model to apply to pbp for aggregation
|
643
877
|
# param 'shot_impact' - boolean determining if the shot impact model will be applied to the dataset
|
644
878
|
|
645
|
-
print(f"Calculating statistics for all games in the provided play-by-play data for {type}s...\nSeasons included: {pbp['season'].drop_duplicates().to_list()}...")
|
879
|
+
print(f"Calculating statistics for all games in the provided play-by-play data at {game_strength} for {type}s...\nSeasons included: {pbp['season'].drop_duplicates().to_list()}...")
|
646
880
|
start = time.perf_counter()
|
647
881
|
|
648
|
-
#Add extra data and apply team changes
|
649
|
-
pbp = prep_xG_data(pbp).replace(convert_team_abbr)
|
650
|
-
|
651
882
|
#Check if xG column exists and apply model if it does not
|
652
883
|
try:
|
653
884
|
pbp['xG']
|
654
|
-
except KeyError:
|
655
|
-
|
656
|
-
pbp = wsba_xG(pbp)
|
657
|
-
else:
|
658
|
-
pbp = moneypuck_xG(pbp)
|
885
|
+
except KeyError:
|
886
|
+
pbp = wsba_xG(pbp)
|
659
887
|
|
660
|
-
#Filter by season types and remove
|
661
|
-
|
888
|
+
#Filter by season types, remove shootouts, remove shots with no coordinates, and remove shots on empty nets
|
889
|
+
pbp_noshot = pbp.loc[(pbp['season_type'].isin(season_types)) & ~(pbp['event_type'].isin(fenwick_events))]
|
890
|
+
|
891
|
+
#Include everything when strengths is set to 'all'
|
892
|
+
if game_strength == 'all':
|
893
|
+
mask = ((pbp['event_type'].isin(fenwick_events)) & (pbp['empty_net']<1))
|
894
|
+
else:
|
895
|
+
mask = ((pbp['event_type'].isin(fenwick_events)) & (pbp['empty_net']<1) & (pbp['x'].notna()) & (pbp['y'].notna()))
|
662
896
|
|
897
|
+
pbp_shot = pbp.loc[(pbp['season_type'].isin(season_types)) & mask]
|
898
|
+
|
899
|
+
pbp = pd.concat([pbp_shot,pbp_noshot])
|
900
|
+
|
663
901
|
#Convert all columns with player ids to float in order to avoid merging errors
|
664
902
|
for col in get_col():
|
665
903
|
if "_id" in col:
|
666
904
|
try: pbp[col] = pbp[col].astype(float)
|
667
905
|
except KeyError: continue
|
668
906
|
|
669
|
-
#
|
670
|
-
if
|
671
|
-
|
907
|
+
#Split by game if specified
|
908
|
+
if split_game:
|
909
|
+
second_group = ['season','game_id']
|
910
|
+
else:
|
911
|
+
second_group = ['season']
|
672
912
|
|
673
913
|
#Split calculation
|
674
|
-
if type == '
|
675
|
-
complete =
|
676
|
-
|
677
|
-
#WSBA
|
678
|
-
complete['WSBA'] = complete['Team']+complete['Season'].astype(str)
|
914
|
+
if type == 'goalie':
|
915
|
+
complete = calc_goalie(pbp,game_strength,second_group)
|
679
916
|
|
680
917
|
#Set TOI to minute
|
681
918
|
complete['TOI'] = complete['TOI']/60
|
682
919
|
|
683
920
|
#Add per 60 stats
|
684
|
-
for stat in
|
921
|
+
for stat in ['FF','FA','xGF','xGA','GF','GA','CF','CA','GSAx']:
|
685
922
|
complete[f'{stat}/60'] = (complete[stat]/complete['TOI'])*60
|
923
|
+
|
924
|
+
complete['GF%'] = complete['GF']/(complete['GF']+complete['GA'])
|
925
|
+
complete['xGF%'] = complete['xGF']/(complete['xGF']+complete['xGA'])
|
926
|
+
complete['FF%'] = complete['FF']/(complete['FF']+complete['FA'])
|
927
|
+
complete['CF%'] = complete['CF']/(complete['CF']+complete['CA'])
|
686
928
|
|
687
|
-
#
|
688
|
-
|
689
|
-
complete[f'{stat}/60 Percentile'] = complete[f'{stat}/60'].rank(pct=True)
|
929
|
+
#Remove entries with no ID listed
|
930
|
+
complete = complete.loc[complete['ID'].notna()]
|
690
931
|
|
691
|
-
#
|
692
|
-
|
693
|
-
|
932
|
+
#Import rosters and player info
|
933
|
+
rosters = pd.read_csv(roster_path)
|
934
|
+
names = rosters[['id','fullName',
|
935
|
+
'headshot','positionCode','shootsCatches',
|
936
|
+
'heightInInches','weightInPounds',
|
937
|
+
'birthDate','birthCountry']].drop_duplicates(subset=['id','fullName'],keep='last')
|
938
|
+
|
939
|
+
#Add names
|
940
|
+
complete = pd.merge(complete,names,how='left',left_on='ID',right_on='id')
|
941
|
+
|
942
|
+
#Rename if there are no missing names
|
943
|
+
complete = complete.rename(columns={'fullName':'Goalie',
|
944
|
+
'headshot':'Headshot',
|
945
|
+
'positionCode':'Position',
|
946
|
+
'shootsCatches':'Handedness',
|
947
|
+
'heightInInches':'Height (in)',
|
948
|
+
'weightInPounds':'Weight (lbs)',
|
949
|
+
'birthDate':'Birthday',
|
950
|
+
'birthCountry':'Nationality'})
|
951
|
+
|
952
|
+
#WSBA
|
953
|
+
complete['WSBA'] = complete['Goalie']+complete['Team']+complete['Season'].astype(str)
|
954
|
+
|
955
|
+
#Add player age
|
956
|
+
complete['Birthday'] = pd.to_datetime(complete['Birthday'])
|
957
|
+
complete['season_year'] = complete['Season'].astype(str).str[4:8].astype(int)
|
958
|
+
complete['Age'] = complete['season_year'] - complete['Birthday'].dt.year
|
959
|
+
|
960
|
+
#Find player headshot
|
961
|
+
complete['Headshot'] = 'https://assets.nhle.com/mugs/nhl/'+complete['Season'].astype(str)+'/'+complete['Team']+'/'+complete['ID'].astype(int).astype(str)+'.png'
|
694
962
|
|
695
963
|
end = time.perf_counter()
|
696
964
|
length = end-start
|
697
965
|
print(f'...finished in {(length if length <60 else length/60):.2f} {'seconds' if length <60 else 'minutes'}.')
|
698
|
-
|
966
|
+
|
967
|
+
head = ['Goalie','ID','Game'] if 'Game' in complete.columns else ['Goalie','ID']
|
968
|
+
complete = complete[head+[
|
969
|
+
"Season","Team",'WSBA',
|
970
|
+
'Headshot','Position','Handedness',
|
971
|
+
'Height (in)','Weight (lbs)',
|
972
|
+
'Birthday','Age','Nationality',
|
973
|
+
'GP','TOI',
|
974
|
+
"GF","FF","xGF","xGF/FF","GF/xGF","FshF%",
|
975
|
+
"GA","FA","xGA","xGA/FA","GA/xGA","FshA%",
|
976
|
+
'CF','CA',
|
977
|
+
'GSAx',
|
978
|
+
'RushF','RushA','RushFxG','RushAxG','RushFG','RushAG'
|
979
|
+
]+[f'{stat}/60' for stat in ['FF','FA','xGF','xGA','GF','GA','CF','CA','GSAx']]]
|
980
|
+
|
981
|
+
#Apply shot impacts if necessary
|
699
982
|
if shot_impact:
|
700
|
-
|
701
|
-
|
702
|
-
|
983
|
+
complete = nhl_shooting_impacts(complete,'goalie')
|
984
|
+
|
985
|
+
end = time.perf_counter()
|
986
|
+
length = end-start
|
987
|
+
print(f'...finished in {(length if length <60 else length/60):.2f} {'seconds' if length <60 else 'minutes'}.')
|
988
|
+
|
989
|
+
return complete
|
990
|
+
|
991
|
+
elif type == 'team':
|
992
|
+
complete = calc_team(pbp,game_strength,second_group)
|
993
|
+
|
994
|
+
#WSBA
|
995
|
+
complete['WSBA'] = complete['Team']+complete['Season'].astype(str)
|
996
|
+
|
997
|
+
#Set TOI to minute
|
998
|
+
complete['TOI'] = complete['TOI']/60
|
999
|
+
|
1000
|
+
#Add per 60 stats
|
1001
|
+
for stat in per_sixty[10:len(per_sixty)]:
|
1002
|
+
complete[f'{stat}/60'] = (complete[stat]/complete['TOI'])*60
|
1003
|
+
|
1004
|
+
complete['GF%'] = complete['GF']/(complete['GF']+complete['GA'])
|
1005
|
+
complete['xGF%'] = complete['xGF']/(complete['xGF']+complete['xGA'])
|
1006
|
+
complete['FF%'] = complete['FF']/(complete['FF']+complete['FA'])
|
1007
|
+
complete['CF%'] = complete['CF']/(complete['CF']+complete['CA'])
|
1008
|
+
|
1009
|
+
head = ['Team','Game'] if 'Game' in complete.columns else ['Team']
|
1010
|
+
complete = complete[head+[
|
1011
|
+
'Season','WSBA',
|
1012
|
+
'GP','TOI',
|
1013
|
+
"GF","FF","xGF","xGF/FF","GF/xGF","FshF%",
|
1014
|
+
"GA","FA","xGA","xGA/FA","GA/xGA","FshA%",
|
1015
|
+
'CF','CA',
|
1016
|
+
'GF%','FF%','xGF%','CF%',
|
1017
|
+
'HF','HA','HF%',
|
1018
|
+
'Penl','Penl2','Penl5','PIM','Draw','PENL%',
|
1019
|
+
'Give','Take','PM%',
|
1020
|
+
'Block',
|
1021
|
+
'RushF','RushA','RushFxG','RushAxG','RushFG','RushAG'
|
1022
|
+
]+[f'{stat}/60' for stat in per_sixty[10:len(per_sixty)]]]
|
1023
|
+
#Apply shot impacts if necessary
|
1024
|
+
if shot_impact:
|
1025
|
+
complete = nhl_shooting_impacts(complete,'team')
|
1026
|
+
|
1027
|
+
end = time.perf_counter()
|
1028
|
+
length = end-start
|
1029
|
+
print(f'...finished in {(length if length <60 else length/60):.2f} {'seconds' if length <60 else 'minutes'}.')
|
1030
|
+
|
1031
|
+
return complete
|
703
1032
|
else:
|
704
|
-
indv_stats = calc_indv(pbp)
|
705
|
-
onice_stats = calc_onice(pbp)
|
1033
|
+
indv_stats = calc_indv(pbp,game_strength,second_group)
|
1034
|
+
onice_stats = calc_onice(pbp,game_strength,second_group)
|
706
1035
|
|
707
1036
|
#IDs sometimes set as objects
|
708
1037
|
indv_stats['ID'] = indv_stats['ID'].astype(float)
|
709
1038
|
onice_stats['ID'] = onice_stats['ID'].astype(float)
|
710
1039
|
|
711
1040
|
#Merge and add columns for extra stats
|
712
|
-
complete = pd.merge(indv_stats,onice_stats,how="outer",on=['ID','Team','Season'])
|
1041
|
+
complete = pd.merge(indv_stats,onice_stats,how="outer",on=['ID','Team','Season']+(['Game'] if 'game_id' in second_group else []))
|
713
1042
|
complete['GC%'] = complete['Gi']/complete['GF']
|
714
1043
|
complete['AC%'] = (complete['A1']+complete['A2'])/complete['GF']
|
715
1044
|
complete['GI%'] = (complete['Gi']+complete['A1']+complete['A2'])/complete['GF']
|
716
1045
|
complete['FC%'] = complete['Fi']/complete['FF']
|
717
1046
|
complete['xGC%'] = complete['xGi']/complete['xGF']
|
1047
|
+
complete['GF%'] = complete['GF']/(complete['GF']+complete['GA'])
|
1048
|
+
complete['xGF%'] = complete['xGF']/(complete['xGF']+complete['xGA'])
|
1049
|
+
complete['FF%'] = complete['FF']/(complete['FF']+complete['FA'])
|
1050
|
+
complete['CF%'] = complete['CF']/(complete['CF']+complete['CA'])
|
718
1051
|
|
719
1052
|
#Remove entries with no ID listed
|
720
1053
|
complete = complete.loc[complete['ID'].notna()]
|
@@ -742,18 +1075,6 @@ def nhl_calculate_stats(pbp,type,season_types,game_strength,roster_path="rosters
|
|
742
1075
|
#Set TOI to minute
|
743
1076
|
complete['TOI'] = complete['TOI']/60
|
744
1077
|
|
745
|
-
#Add per 60 stats
|
746
|
-
for stat in per_sixty:
|
747
|
-
complete[f'{stat}/60'] = (complete[stat]/complete['TOI'])*60
|
748
|
-
|
749
|
-
#Rank per 60 stats
|
750
|
-
for stat in per_sixty:
|
751
|
-
complete[f'{stat}/60 Percentile'] = complete[f'{stat}/60'].rank(pct=True)
|
752
|
-
|
753
|
-
#Flip percentiles for against stats
|
754
|
-
for stat in ['FA','xGA','GA']:
|
755
|
-
complete[f'{stat}/60 Percentile'] = 1-complete[f'{stat}/60 Percentile']
|
756
|
-
|
757
1078
|
#Add player age
|
758
1079
|
complete['Birthday'] = pd.to_datetime(complete['Birthday'])
|
759
1080
|
complete['season_year'] = complete['Season'].astype(str).str[4:8].astype(int)
|
@@ -762,42 +1083,56 @@ def nhl_calculate_stats(pbp,type,season_types,game_strength,roster_path="rosters
|
|
762
1083
|
#Find player headshot
|
763
1084
|
complete['Headshot'] = 'https://assets.nhle.com/mugs/nhl/'+complete['Season'].astype(str)+'/'+complete['Team']+'/'+complete['ID'].astype(int).astype(str)+'.png'
|
764
1085
|
|
765
|
-
end = time.perf_counter()
|
766
|
-
length = end-start
|
767
1086
|
#Remove goalies that occasionally appear in a set
|
768
1087
|
complete = complete.loc[complete['Position']!='G']
|
769
1088
|
#Add WSBA ID
|
770
1089
|
complete['WSBA'] = complete['Player']+complete['Season'].astype(str)+complete['Team']
|
771
1090
|
|
1091
|
+
#Add per 60 stats
|
1092
|
+
for stat in per_sixty:
|
1093
|
+
complete[f'{stat}/60'] = (complete[stat]/complete['TOI'])*60
|
1094
|
+
|
772
1095
|
#Shot Type Metrics
|
773
1096
|
type_metrics = []
|
774
1097
|
for type in shot_types:
|
775
1098
|
for stat in per_sixty[:3]:
|
776
1099
|
type_metrics.append(f'{type.capitalize()}{stat}')
|
777
1100
|
|
778
|
-
|
779
|
-
|
1101
|
+
head = ['Player','ID','Game'] if 'Game' in complete.columns else ['Player','ID']
|
1102
|
+
complete = complete[head+[
|
780
1103
|
"Season","Team",'WSBA',
|
781
1104
|
'Headshot','Position','Handedness',
|
782
1105
|
'Height (in)','Weight (lbs)',
|
783
1106
|
'Birthday','Age','Nationality',
|
784
1107
|
'GP','TOI',
|
785
1108
|
"Gi","A1","A2",'P1','P',
|
1109
|
+
'Give','Take','PM%','HF','HA','HF%',
|
786
1110
|
"Fi","xGi",'xGi/Fi',"Gi/xGi","Fshi%",
|
787
1111
|
"GF","FF","xGF","xGF/FF","GF/xGF","FshF%",
|
788
1112
|
"GA","FA","xGA","xGA/FA","GA/xGA","FshA%",
|
1113
|
+
'Ci','CF','CA','CF%',
|
1114
|
+
'FF%','xGF%','GF%',
|
789
1115
|
'Rush',"Rush xG",'Rush G',"GC%","AC%","GI%","FC%","xGC%",
|
790
|
-
|
1116
|
+
'F','FW','FL','F%',
|
1117
|
+
'Penl','Penl2','Penl5',
|
1118
|
+
'Draw','PIM','PENL%',
|
1119
|
+
'Block',
|
1120
|
+
'OZF','NZF','DZF',
|
1121
|
+
'OZF%','NZF%','DZF%',
|
1122
|
+
]+[f'{stat}/60' for stat in per_sixty]+type_metrics].fillna(0).sort_values(['Player','Season','Team','ID'])
|
791
1123
|
|
792
|
-
print(f'...finished in {(length if length <60 else length/60):.2f} {'seconds' if length <60 else 'minutes'}.')
|
793
1124
|
#Apply shot impacts if necessary (Note: this will remove skaters with fewer than 150 minutes of TOI due to the shot impact TOI rule)
|
794
1125
|
if shot_impact:
|
795
|
-
|
796
|
-
|
797
|
-
|
1126
|
+
complete = nhl_shooting_impacts(complete,'skater')
|
1127
|
+
|
1128
|
+
end = time.perf_counter()
|
1129
|
+
length = end-start
|
1130
|
+
print(f'...finished in {(length if length <60 else length/60):.2f} {'seconds' if length <60 else 'minutes'}.')
|
798
1131
|
|
799
|
-
|
800
|
-
|
1132
|
+
return complete
|
1133
|
+
|
1134
|
+
def nhl_plot_skaters_shots(pbp,skater_dict,strengths,marker_dict=event_markers,onice = 'indv',title = True,legend=False):
|
1135
|
+
#Returns dict of plots for specified skaters
|
801
1136
|
# param 'pbp' - pbp to plot data
|
802
1137
|
# param 'skater_dict' - skaters to plot shots for (format: {'Patrice Bergeron':['20242025','BOS']})
|
803
1138
|
# param 'strengths' - strengths to include in plotting
|
@@ -809,18 +1144,19 @@ def nhl_plot_skaters_shots(pbp,skater_dict,strengths,marker_dict=event_markers,o
|
|
809
1144
|
|
810
1145
|
print(f'Plotting the following skater shots: {skater_dict}...')
|
811
1146
|
|
812
|
-
#Iterate through
|
813
|
-
skater_plots =
|
1147
|
+
#Iterate through skaters, adding plots to dict
|
1148
|
+
skater_plots = {}
|
814
1149
|
for skater in skater_dict.keys():
|
815
1150
|
skater_info = skater_dict[skater]
|
816
1151
|
title = f'{skater} Fenwick Shots for {skater_info[1]} in {skater_info[0][2:4]}-{skater_info[0][6:8]}' if title else ''
|
817
|
-
|
1152
|
+
#Key is formatted as PLAYERSEASONTEAM (i.e. PATRICE BERGERON20212022BOS)
|
1153
|
+
skater_plots.update({f'{skater}{skater_info[0]}{skater_info[1]}':[plot_skater_shots(pbp,skater,skater_info[0],skater_info[1],strengths,title,marker_dict,onice,legend)]})
|
818
1154
|
|
819
1155
|
#Return: list of plotted skater shot charts
|
820
1156
|
return skater_plots
|
821
1157
|
|
822
|
-
def nhl_plot_games(pbp,events,strengths,game_ids='all',marker_dict=event_markers,team_colors={'away':'primary','home':'primary'},legend=False
|
823
|
-
#Returns
|
1158
|
+
def nhl_plot_games(pbp,events,strengths,game_ids='all',marker_dict=event_markers,team_colors={'away':'primary','home':'primary'},legend=False):
|
1159
|
+
#Returns dict of plots for specified games
|
824
1160
|
# param 'pbp' - pbp to plot data
|
825
1161
|
# param 'events' - type of events to plot
|
826
1162
|
# param 'strengths' - strengths to include in plotting
|
@@ -835,8 +1171,10 @@ def nhl_plot_games(pbp,events,strengths,game_ids='all',marker_dict=event_markers
|
|
835
1171
|
|
836
1172
|
print(f'Plotting the following games: {game_ids}...')
|
837
1173
|
|
838
|
-
|
839
|
-
|
1174
|
+
game_plots = {}
|
1175
|
+
#Iterate through games, adding plot to dict
|
1176
|
+
for game in game_ids:
|
1177
|
+
game_plots.update({game:[plot_game_events(pbp,game,events,strengths,marker_dict,team_colors,legend)]})
|
840
1178
|
|
841
1179
|
#Return: list of plotted game events
|
842
1180
|
return game_plots
|
@@ -845,7 +1183,7 @@ def repo_load_rosters(seasons = []):
|
|
845
1183
|
#Returns roster data from repository
|
846
1184
|
# param 'seasons' - list of seasons to include
|
847
1185
|
|
848
|
-
data = pd.read_csv(
|
1186
|
+
data = pd.read_csv(default_roster)
|
849
1187
|
if len(seasons)>0:
|
850
1188
|
data = data.loc[data['season'].isin(seasons)]
|
851
1189
|
|
@@ -855,7 +1193,7 @@ def repo_load_schedule(seasons = []):
|
|
855
1193
|
#Returns schedule data from repository
|
856
1194
|
# param 'seasons' - list of seasons to include
|
857
1195
|
|
858
|
-
data = pd.read_csv(
|
1196
|
+
data = pd.read_csv(schedule_path)
|
859
1197
|
if len(seasons)>0:
|
860
1198
|
data = data.loc[data['season'].isin(seasons)]
|
861
1199
|
|
@@ -864,7 +1202,7 @@ def repo_load_schedule(seasons = []):
|
|
864
1202
|
def repo_load_teaminfo():
|
865
1203
|
#Returns team data from repository
|
866
1204
|
|
867
|
-
return pd.read_csv(
|
1205
|
+
return pd.read_csv(info_path)
|
868
1206
|
|
869
1207
|
def repo_load_pbp(seasons = []):
|
870
1208
|
#Returns play-by-play data from repository
|
@@ -872,7 +1210,7 @@ def repo_load_pbp(seasons = []):
|
|
872
1210
|
|
873
1211
|
#Add parquet to total
|
874
1212
|
print(f'Loading play-by-play from the following seasons: {seasons}...')
|
875
|
-
dfs = [pd.read_parquet(f"https://
|
1213
|
+
dfs = [pd.read_parquet(f"https://f005.backblazeb2.com/file/weakside-breakout/pbp/{season}.parquet") for season in seasons]
|
876
1214
|
|
877
1215
|
return pd.concat(dfs)
|
878
1216
|
|
@@ -880,9 +1218,3 @@ def repo_load_seasons():
|
|
880
1218
|
#List of available seasons to scrape
|
881
1219
|
|
882
1220
|
return seasons
|
883
|
-
|
884
|
-
def admin_convert_to_parquet(seasons):
|
885
|
-
for season in seasons:
|
886
|
-
load = pd.read_csv(f'pbp/csv/nhl_pbp_{season}.csv')
|
887
|
-
|
888
|
-
load.to_parquet(f'pbp/parquet/nhl_pbp_{season}.parquet',index=False)
|