wsba-hockey 1.0.3__py3-none-any.whl → 1.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- wsba_hockey/data_pipelines.py +183 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/duckdb/vendor.py +146 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/flatted/python/flatted.py +149 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/flatted/python/test.py +63 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/gyp_main.py +45 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSNew.py +367 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSProject.py +206 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSSettings.py +1270 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSSettings_test.py +1547 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSToolFile.py +59 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSUserFile.py +153 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSUtil.py +271 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSVersion.py +574 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/__init__.py +690 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/common.py +661 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/common_test.py +78 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/easy_xml.py +165 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/easy_xml_test.py +109 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/flock_tool.py +55 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/__init__.py +0 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/analyzer.py +808 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/android.py +1173 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/cmake.py +1321 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/compile_commands_json.py +120 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/dump_dependency_json.py +103 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/eclipse.py +464 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/gypd.py +89 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/gypsh.py +58 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/make.py +2714 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/msvs.py +3981 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/msvs_test.py +44 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/ninja.py +2936 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/ninja_test.py +55 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/xcode.py +1394 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/xcode_test.py +25 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/input.py +3130 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/input_test.py +98 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/mac_tool.py +771 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/msvs_emulation.py +1271 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/ninja_syntax.py +174 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/simple_copy.py +61 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/win_tool.py +374 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/xcode_emulation.py +1939 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/xcode_ninja.py +302 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/xcodeproj_file.py +3197 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/xml_fix.py +65 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/test_gyp.py +261 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/tools/graphviz.py +102 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/tools/pretty_gyp.py +156 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/tools/pretty_sln.py +181 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/tools/pretty_vcproj.py +339 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/test/fixtures/test-charmap.py +31 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/update-gyp.py +64 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/gyp_main.py +45 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSNew.py +367 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSProject.py +206 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSSettings.py +1270 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSSettings_test.py +1547 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSToolFile.py +59 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSUserFile.py +153 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSUtil.py +271 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSVersion.py +574 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/__init__.py +666 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/common.py +654 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/common_test.py +78 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/easy_xml.py +165 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/easy_xml_test.py +109 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/flock_tool.py +55 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/__init__.py +0 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/analyzer.py +808 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/android.py +1173 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/cmake.py +1321 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/compile_commands_json.py +120 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/dump_dependency_json.py +103 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/eclipse.py +464 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/gypd.py +89 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/gypsh.py +58 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/make.py +2518 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/msvs.py +3978 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/msvs_test.py +44 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/ninja.py +2936 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/ninja_test.py +55 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/xcode.py +1394 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/xcode_test.py +25 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/input.py +3137 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/input_test.py +98 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/mac_tool.py +771 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/msvs_emulation.py +1271 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/ninja_syntax.py +174 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/simple_copy.py +61 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/win_tool.py +374 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/xcode_emulation.py +1939 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/xcode_ninja.py +302 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/xcodeproj_file.py +3197 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/xml_fix.py +65 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/setup.py +42 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/test_gyp.py +260 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/tools/graphviz.py +102 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/tools/pretty_gyp.py +156 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/tools/pretty_sln.py +181 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/tools/pretty_vcproj.py +339 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/test/fixtures/test-charmap.py +31 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/update-gyp.py +46 -0
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/game_stats/app.py +401 -0
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/game_stats/name_fix.py +47 -0
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/heatmaps/app.py +108 -0
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/heatmaps/plot.py +93 -0
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/heatmaps/rink_plot.py +245 -0
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/matchups/app.py +145 -0
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/matchups/plot.py +77 -0
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/matchups/rink_plot.py +245 -0
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/pbp/app.py +389 -0
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/pbp/plot.py +70 -0
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/pbp/rink_plot.py +245 -0
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/skater/app.py +110 -0
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/skater/plot.py +58 -0
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/skater/rink_plot.py +245 -0
- wsba_hockey/tools/agg.py +242 -53
- wsba_hockey/tools/plotting.py +12 -17
- wsba_hockey/tools/scraping.py +149 -258
- wsba_hockey/tools/xg_model.py +357 -311
- wsba_hockey/workspace.py +22 -117
- wsba_hockey/wsba_main.py +493 -165
- {wsba_hockey-1.0.3.dist-info → wsba_hockey-1.0.4.dist-info}/METADATA +1 -1
- wsba_hockey-1.0.4.dist-info/RECORD +135 -0
- {wsba_hockey-1.0.3.dist-info → wsba_hockey-1.0.4.dist-info}/WHEEL +1 -1
- wsba_hockey/stats/calculate_viz/shot_impact.py +0 -2
- wsba_hockey-1.0.3.dist-info/RECORD +0 -19
- {wsba_hockey-1.0.3.dist-info → wsba_hockey-1.0.4.dist-info}/licenses/LICENSE +0 -0
- {wsba_hockey-1.0.3.dist-info → wsba_hockey-1.0.4.dist-info}/top_level.txt +0 -0
wsba_hockey/wsba_main.py
CHANGED
@@ -1,13 +1,12 @@
|
|
1
1
|
import requests as rs
|
2
2
|
import pandas as pd
|
3
|
-
import numpy as np
|
4
|
-
from datetime import datetime, timedelta, date
|
5
3
|
import time
|
6
4
|
import random
|
7
|
-
from
|
8
|
-
from
|
9
|
-
from
|
10
|
-
from
|
5
|
+
from datetime import datetime, timedelta, date
|
6
|
+
from tools.scraping import *
|
7
|
+
from tools.xg_model import *
|
8
|
+
from tools.agg import *
|
9
|
+
from tools.plotting import *
|
11
10
|
|
12
11
|
### WSBA HOCKEY ###
|
13
12
|
## Provided below are all integral functions in the WSBA Hockey Python package. ##
|
@@ -59,7 +58,7 @@ convert_team_abbr = {'L.A':'LAK',
|
|
59
58
|
'T.B':'TBL',
|
60
59
|
'PHX':'ARI'}
|
61
60
|
|
62
|
-
per_sixty = ['Fi','xGi','Gi','A1','A2','P1','P','FF','FA','xGF','xGA','GF','GA']
|
61
|
+
per_sixty = ['Fi','xGi','Gi','A1','A2','P1','P','OZF','NZF','DZF','FF','FA','xGF','xGA','GF','GA','CF','CA','HF','HA','Give','Take','Penl','Penl2','Penl5','Draw','Block']
|
63
62
|
|
64
63
|
#Some games in the API are specifically known to cause errors in scraping.
|
65
64
|
#This list is updated as frequently as necessary
|
@@ -73,15 +72,12 @@ known_probs ={
|
|
73
72
|
'2009020658':'Missing shifts data for game between New York Islanders and Dallas.',
|
74
73
|
'2009020885':'Missing shifts data for game between Sharks and Blue Jackets.',
|
75
74
|
'2010020124':'Game between Capitals and Hurricanes is sporadically missing player on-ice data',
|
75
|
+
'2012020018':'HTML events contain mislabeled events.',
|
76
76
|
'2013020971':'On March 10th, 2014, Stars forward Rich Peverley suffered from a cardiac episode midgame and as a result, the remainder of the game was postponed. \nThe game resumed on April 9th, and the only goal scorer in the game, Blue Jackets forward Nathan Horton, did not appear in the resumed game due to injury. Interestingly, Horton would never play in the NHL again.',
|
77
77
|
'2018021133':'Game between Lightning and Capitals has incorrectly labeled event teams (i.e. WSH TAKEAWAY - #71 CIRELLI (Cirelli is a Tampa Bay skater in this game)).',
|
78
78
|
'2019020876':'Due to the frightening collapse of Blues defensemen Jay Bouwmeester, a game on February 2nd, 2020 between the Ducks and Blues was postponed. \nWhen the game resumed, Ducks defensemen Hampus Lindholm, who assisted on a goal in the inital game, did not play in the resumed match.'
|
79
79
|
}
|
80
80
|
|
81
|
-
name_change = {
|
82
|
-
"":"",
|
83
|
-
}
|
84
|
-
|
85
81
|
shot_types = ['wrist','deflected','tip-in','slap','backhand','snap','wrap-around','poke','bat','cradle','between-legs']
|
86
82
|
|
87
83
|
new = 2024
|
@@ -107,14 +103,17 @@ standings_end = {
|
|
107
103
|
'20242025':'04-17'
|
108
104
|
}
|
109
105
|
|
106
|
+
events = ['faceoff','hit','giveaway','takeaway','blocked-shot','missed-shot','shot-on-goal','goal','penalty']
|
107
|
+
|
110
108
|
## SCRAPE FUNCTIONS ##
|
111
|
-
def nhl_scrape_game(game_ids,split_shifts = False, remove = ['period-start','period-end','challenge','stoppage'],verbose = False, errors = False):
|
109
|
+
def nhl_scrape_game(game_ids,split_shifts = False, remove = ['period-start','period-end','challenge','stoppage','shootout-complete','game-end'],verbose = False, sources = False, errors = False):
|
112
110
|
#Given a set of game_ids (NHL API), return complete play-by-play information as requested
|
113
111
|
# param 'game_ids' - NHL game ids (or list formatted as ['random', num_of_games, start_year, end_year])
|
114
112
|
# param 'split_shifts' - boolean which splits pbp and shift events if true
|
115
113
|
# param 'remove' - list of events to remove from final dataframe
|
116
114
|
# param 'xg' - xG model to apply to pbp for aggregation
|
117
115
|
# param 'verbose' - boolean which adds additional event info if true
|
116
|
+
# param 'sources - boolean scraping the html and json sources to a master directory if true
|
118
117
|
# param 'errors' - boolean returning game ids which did not scrape if true
|
119
118
|
|
120
119
|
pbps = []
|
@@ -154,6 +153,7 @@ def nhl_scrape_game(game_ids,split_shifts = False, remove = ['period-start','per
|
|
154
153
|
#Scrape each game
|
155
154
|
#Track Errors
|
156
155
|
error_ids = []
|
156
|
+
prog = 0
|
157
157
|
for game_id in game_ids:
|
158
158
|
print("Scraping data from game " + str(game_id) + "...",end="")
|
159
159
|
start = time.perf_counter()
|
@@ -161,15 +161,25 @@ def nhl_scrape_game(game_ids,split_shifts = False, remove = ['period-start','per
|
|
161
161
|
try:
|
162
162
|
#Retrieve data
|
163
163
|
info = get_game_info(game_id)
|
164
|
-
data = combine_data(info)
|
164
|
+
data = combine_data(info, sources)
|
165
165
|
|
166
166
|
#Append data to list
|
167
167
|
pbps.append(data)
|
168
168
|
|
169
169
|
end = time.perf_counter()
|
170
170
|
secs = end - start
|
171
|
-
|
171
|
+
prog += 1
|
172
|
+
|
173
|
+
#Export if sources is true
|
174
|
+
if sources:
|
175
|
+
dirs = f'sources/{info['season']}/'
|
176
|
+
|
177
|
+
if not os.path.exists(dirs):
|
178
|
+
os.makedirs(dirs)
|
172
179
|
|
180
|
+
data.to_csv(f'{dirs}{info['game_id']}.csv',index=False)
|
181
|
+
|
182
|
+
print(f" finished in {secs:.2f} seconds. {prog}/{len(game_ids)} ({(prog/len(game_ids))*100:.2f}%)")
|
173
183
|
except:
|
174
184
|
#Games such as the all-star game and pre-season games will incur this error
|
175
185
|
#Other games have known problems
|
@@ -180,7 +190,7 @@ def nhl_scrape_game(game_ids,split_shifts = False, remove = ['period-start','per
|
|
180
190
|
|
181
191
|
#Track error
|
182
192
|
error_ids.append(game_id)
|
183
|
-
|
193
|
+
|
184
194
|
#Add all pbps together
|
185
195
|
if len(pbps) == 0:
|
186
196
|
print("\rNo data returned.")
|
@@ -256,30 +266,37 @@ def nhl_scrape_schedule(season,start = "09-01", end = "08-01"):
|
|
256
266
|
#Handles dates which are over a year apart
|
257
267
|
day = 365 + day
|
258
268
|
for i in range(day):
|
259
|
-
#For each day, call NHL api and retreive
|
269
|
+
#For each day, call NHL api and retreive info on all games of selected game
|
260
270
|
inc = start+timedelta(days=i)
|
261
271
|
print("Scraping games on " + str(inc)[:10]+"...")
|
262
272
|
|
263
273
|
get = rs.get(api+str(inc)[:10]).json()
|
264
|
-
gameWeek = list(pd.json_normalize(get['gameWeek'])['games'])[0]
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
274
|
+
gameWeek = pd.json_normalize(list(pd.json_normalize(get['gameWeek'])['games'])[0])
|
275
|
+
|
276
|
+
#Return nothing if there's nothing
|
277
|
+
if gameWeek.empty:
|
278
|
+
game.append(gameWeek)
|
279
|
+
else:
|
280
|
+
gameWeek['date'] = get['gameWeek'][0]['date']
|
281
|
+
|
282
|
+
gameWeek['season_type'] = gameWeek['gameType']
|
283
|
+
gameWeek['away_team_abbr'] = gameWeek['awayTeam.abbrev']
|
284
|
+
gameWeek['home_team_abbr'] = gameWeek['homeTeam.abbrev']
|
285
|
+
gameWeek['game_title'] = gameWeek['away_team_abbr'] + " @ " + gameWeek['home_team_abbr'] + " - " + gameWeek['date']
|
286
|
+
gameWeek['estStartTime'] = pd.to_datetime(gameWeek['startTimeUTC']).dt.tz_convert('US/Eastern').dt.strftime("%I:%M %p")
|
287
|
+
|
288
|
+
front_col = ['id','season','date','season_type','game_title','away_team_abbr','home_team_abbr','estStartTime']
|
289
|
+
gameWeek = gameWeek[front_col+[col for col in gameWeek.columns.to_list() if col not in front_col]]
|
290
|
+
|
291
|
+
game.append(gameWeek)
|
292
|
+
|
276
293
|
#Concatenate all games
|
277
294
|
df = pd.concat(game)
|
278
295
|
|
279
296
|
#Return: specificed schedule data
|
280
297
|
return df
|
281
298
|
|
282
|
-
def nhl_scrape_season(season,split_shifts = False, season_types = [2,3], remove = ['period-start','period-end','game-end','challenge','stoppage'], start = "09-01", end = "08-01", local=False, local_path = "schedule/schedule.csv", verbose = False, errors = False):
|
299
|
+
def nhl_scrape_season(season,split_shifts = False, season_types = [2,3], remove = ['period-start','period-end','game-end','challenge','stoppage'], start = "09-01", end = "08-01", local=False, local_path = "schedule/schedule.csv", verbose = False, sources = False, errors = False):
|
283
300
|
#Given season, scrape all play-by-play occuring within the season
|
284
301
|
# param 'season' - NHL season to scrape
|
285
302
|
# param 'split_shifts' - boolean which splits pbp and shift events if true
|
@@ -289,12 +306,21 @@ def nhl_scrape_season(season,split_shifts = False, season_types = [2,3], remove
|
|
289
306
|
# param 'local' - boolean indicating whether to use local file to scrape game_ids
|
290
307
|
# param 'local_path' - path of local file
|
291
308
|
# param 'verbose' - boolean which adds additional event info if true
|
309
|
+
# param 'sources - boolean scraping the html and json sources to a master directory if true
|
292
310
|
# param 'errors' - boolean returning game ids which did not scrape if true
|
293
311
|
|
294
312
|
#Determine whether to use schedule data in repository or to scrape
|
295
|
-
if local
|
313
|
+
if local:
|
296
314
|
load = pd.read_csv(local_path)
|
297
|
-
load
|
315
|
+
load['date'] = pd.to_datetime(load['date'])
|
316
|
+
|
317
|
+
start = f'{(season[0:4] if int(start[0:2])>=9 else season[4:8])}-{int(start[0:2])}-{int(start[3:5])}'
|
318
|
+
end = f'{(season[0:4] if int(end[0:2])>=9 else season[4:8])}-{int(end[0:2])}-{int(end[3:5])}'
|
319
|
+
|
320
|
+
load = load.loc[(load['season'].astype(str)==season)&
|
321
|
+
(load['season_type'].isin(season_types))&
|
322
|
+
(load['date']>=start)&(load['date']<=end)]
|
323
|
+
|
298
324
|
game_ids = list(load['id'].astype(str))
|
299
325
|
else:
|
300
326
|
load = nhl_scrape_schedule(season,start,end)
|
@@ -310,31 +336,17 @@ def nhl_scrape_season(season,split_shifts = False, season_types = [2,3], remove
|
|
310
336
|
start = time.perf_counter()
|
311
337
|
|
312
338
|
#Perform scrape
|
313
|
-
if split_shifts
|
314
|
-
data = nhl_scrape_game(game_ids,split_shifts=True,remove=remove,verbose=verbose,errors=errors)
|
339
|
+
if split_shifts:
|
340
|
+
data = nhl_scrape_game(game_ids,split_shifts=True,remove=remove,verbose=verbose,sources=sources,errors=errors)
|
315
341
|
else:
|
316
|
-
data = nhl_scrape_game(game_ids,remove=remove,verbose=verbose,errors=errors)
|
342
|
+
data = nhl_scrape_game(game_ids,remove=remove,verbose=verbose,sources=sources,errors=errors)
|
317
343
|
|
318
344
|
end = time.perf_counter()
|
319
345
|
secs = end - start
|
320
346
|
|
321
347
|
print(f'Finished season scrape in {(secs/60)/60:.2f} hours.')
|
322
348
|
#Return: Complete pbp and shifts data for specified season as well as dataframe of game_ids which failed to return data
|
323
|
-
|
324
|
-
pbp_dict = {'pbp':data['pbp'],
|
325
|
-
'shifts':data['shifts']}
|
326
|
-
|
327
|
-
if errors:
|
328
|
-
pbp_dict.update({'errors':data['errors']})
|
329
|
-
return pbp_dict
|
330
|
-
else:
|
331
|
-
pbp = data
|
332
|
-
if errors:
|
333
|
-
pbp_dict = {'pbp':pbp,
|
334
|
-
'errors':data['errors']}
|
335
|
-
return pbp_dict
|
336
|
-
else:
|
337
|
-
return pbp
|
349
|
+
return data
|
338
350
|
|
339
351
|
def nhl_scrape_seasons_info(seasons = []):
|
340
352
|
#Returns info related to NHL seasons (by default, all seasons are included)
|
@@ -449,17 +461,24 @@ def nhl_scrape_team_info(country = False):
|
|
449
461
|
|
450
462
|
return data.sort_values(by=(['country3Code','countryCode','iocCode','countryName'] if country else ['fullName','triCode','id']))
|
451
463
|
|
452
|
-
def nhl_scrape_player_data(
|
464
|
+
def nhl_scrape_player_data(player_ids):
|
453
465
|
#Given player id, return player information
|
454
|
-
|
466
|
+
infos = []
|
467
|
+
for player_id in player_ids:
|
468
|
+
player_id = int(player_id)
|
469
|
+
api = f'https://api-web.nhle.com/v1/player/{player_id}/landing'
|
470
|
+
|
471
|
+
data = pd.json_normalize(rs.get(api).json())
|
455
472
|
|
456
|
-
|
473
|
+
#Add name column
|
474
|
+
data['fullName'] = (data['firstName.default'] + " " + data['lastName.default']).str.upper()
|
457
475
|
|
458
|
-
|
459
|
-
|
476
|
+
#Append
|
477
|
+
infos.append(data)
|
460
478
|
|
479
|
+
df = pd.concat(infos)
|
461
480
|
#Return: player data
|
462
|
-
return
|
481
|
+
return df
|
463
482
|
|
464
483
|
def nhl_scrape_draft_rankings(arg = 'now', category = ''):
|
465
484
|
#Given url argument for timeframe and prospect category, return draft rankings
|
@@ -478,12 +497,26 @@ def nhl_scrape_draft_rankings(arg = 'now', category = ''):
|
|
478
497
|
#Return: prospect rankings
|
479
498
|
return data
|
480
499
|
|
481
|
-
def
|
500
|
+
def nhl_apply_xG(pbp):
|
501
|
+
#Given play-by-play data, return this data with xG-related columns
|
502
|
+
|
503
|
+
#param 'pbp' - play-by-play data
|
504
|
+
|
505
|
+
print(f'Applying WSBA xG to model with seasons: {pbp['season'].drop_duplicates().to_list()}')
|
506
|
+
#Fix player data
|
507
|
+
#pbp = fix_players(pbp)
|
508
|
+
|
509
|
+
#Apply xG model
|
510
|
+
pbp = wsba_xG(pbp)
|
511
|
+
|
512
|
+
return pbp
|
513
|
+
|
514
|
+
def nhl_shooting_impacts(agg,type):
|
482
515
|
#Given stats table generated from the nhl_calculate_stats function, return table with shot impacts
|
483
516
|
#Only 5v5 is supported as of now
|
484
517
|
|
485
518
|
#param 'agg' - stats table
|
486
|
-
#param '
|
519
|
+
#param 'type' - type of stats to calculate ('skater', 'goalie', or 'team')
|
487
520
|
|
488
521
|
#COMPOSITE IMPACT EVALUATIONS:
|
489
522
|
|
@@ -509,7 +542,7 @@ def nhl_shooting_impacts(agg,team=False):
|
|
509
542
|
|
510
543
|
return rate+qual+fini
|
511
544
|
|
512
|
-
if
|
545
|
+
if type == 'goalie':
|
513
546
|
pos = agg
|
514
547
|
for group in [('OOFF','F'),('ODEF','A')]:
|
515
548
|
#Have to set this columns for compatibility with df.apply
|
@@ -536,35 +569,172 @@ def nhl_shooting_impacts(agg,team=False):
|
|
536
569
|
pos[f'{group[0]}-SRI'] = pos['g'] - pos.apply(lambda x: goal_comp(avg_fen,x.xg_fen,x.xg,x.g,avg_fsh),axis=1)
|
537
570
|
pos[f'{group[0]}-SQI'] = pos['g'] - pos.apply(lambda x: goal_comp(x.fenwick,avg_xg_fen,x.xg,x.g,avg_fsh),axis=1)
|
538
571
|
pos[f'{group[0]}-FNI'] = pos['g'] - pos.apply(lambda x: goal_comp(x.fenwick,x.xg_fen,avg_xg,avg_g,avg_fsh),axis=1)
|
572
|
+
|
573
|
+
#Convert impacts to totals
|
574
|
+
#Calculate shot rate, shot quality, and finishing impacts
|
575
|
+
pos[f'{group[0]}-SRI-T'] = (pos[f'{group[0]}-SRI']/60)*pos['TOI']
|
576
|
+
pos[f'{group[0]}-SQI-T'] = (pos[f'{group[0]}-SQI']/60)*pos['TOI']
|
577
|
+
pos[f'{group[0]}-FNI-T'] = (pos[f'{group[0]}-FNI']/60)*pos['TOI']
|
539
578
|
|
579
|
+
#Rank per 60 stats
|
580
|
+
for stat in ['FF','FA','xGF','xGA','GF','GA','CF','CA','GSAx']:
|
581
|
+
pos[f'{stat}/60-P'] = pos[f'{stat}/60'].rank(pct=True)
|
582
|
+
|
583
|
+
#Flip percentiles for against stats
|
584
|
+
for stat in ['FA','xGA','GA','CA']:
|
585
|
+
pos[f'{stat}/60-P'] = 1-pos[f'{stat}/60-P']
|
586
|
+
|
540
587
|
#Add extra metrics
|
541
588
|
pos['RushF/60'] = (pos['RushF']/pos['TOI'])*60
|
542
589
|
pos['RushA/60'] = (pos['RushA']/pos['TOI'])*60
|
543
|
-
pos['
|
544
|
-
pos['
|
590
|
+
pos['RushesFF'] = pos['RushF/60'].rank(pct=True)
|
591
|
+
pos['RushesFA'] = 1 - pos['RushA/60'].rank(pct=True)
|
545
592
|
pos['RushFxG/60'] = (pos['RushFxG']/pos['TOI'])*60
|
546
593
|
pos['RushAxG/60'] = (pos['RushAxG']/pos['TOI'])*60
|
547
|
-
pos['
|
548
|
-
pos['
|
594
|
+
pos['RushesxGF'] = pos['RushFxG/60'].rank(pct=True)
|
595
|
+
pos['RushesxGA'] = 1 - pos['RushAxG/60'].rank(pct=True)
|
549
596
|
pos['RushFG/60'] = (pos['RushFG']/pos['TOI'])*60
|
550
597
|
pos['RushAG/60'] = (pos['RushAG']/pos['TOI'])*60
|
551
|
-
pos['
|
552
|
-
pos['
|
598
|
+
pos['RushesGF'] = pos['RushFG/60'].rank(pct=True)
|
599
|
+
pos['RushesGA'] = 1 - pos['RushAG/60'].rank(pct=True)
|
553
600
|
|
554
601
|
#Flip against metric percentiles
|
555
602
|
pos['ODEF-SR'] = 1-pos['ODEF-SR']
|
556
603
|
pos['ODEF-SQ'] = 1-pos['ODEF-SQ']
|
557
604
|
pos['ODEF-FN'] = 1-pos['ODEF-FN']
|
558
605
|
|
606
|
+
#Extraneous Values
|
607
|
+
pos['EGF'] = pos['OOFF-SRI']+pos['OOFF-SQI']+pos['OOFF-FNI']
|
608
|
+
pos['ExGF'] = pos['OOFF-SRI']+pos['OOFF-SQI']
|
609
|
+
pos['EGA'] = pos['ODEF-SRI']+pos['ODEF-SQI']+pos['ODEF-FNI']
|
610
|
+
pos['ExGA'] = pos['ODEF-SRI']+pos['ODEF-SQI']
|
611
|
+
|
612
|
+
#...and their percentiles
|
613
|
+
pos['EGF-P'] = pos['EGF'].rank(pct=True)
|
614
|
+
pos['ExGF-P'] = pos['ExGF'].rank(pct=True)
|
615
|
+
pos['EGA-P'] = pos['EGA'].rank(pct=True)
|
616
|
+
pos['ExGA-P'] = pos['ExGA'].rank(pct=True)
|
617
|
+
|
618
|
+
pos['EGA-P'] = 1-pos['EGA']
|
619
|
+
pos['ExGA-P'] = 1-pos['ExGA']
|
620
|
+
|
621
|
+
#...and then their totals
|
622
|
+
pos['EGF-T'] = (pos['EGF']/60)*pos['TOI']
|
623
|
+
pos['ExGF-T'] = (pos['ExGF']/60)*pos['TOI']
|
624
|
+
pos['EGA-T'] = (pos['EGA']/60)*pos['TOI']
|
625
|
+
pos['ExGA-T'] = (pos['ExGA']/60)*pos['TOI']
|
626
|
+
|
627
|
+
#Goal Composites...
|
628
|
+
pos['Team-Adjusted-EGI'] = pos['ODEF-FNI']-pos['ExGA']
|
629
|
+
pos['GISAx'] = pos['ExGA']-pos['EGA']
|
630
|
+
pos['NetGI'] = pos['EGF'] - pos['EGA']
|
631
|
+
pos['NetxGI'] = pos['ExGF'] - pos['ExGA']
|
632
|
+
|
633
|
+
#...and their percentiles
|
634
|
+
pos['Team-Adjusted-EGI-P'] = pos['Team-Adjusted-EGI'].rank(pct=True)
|
635
|
+
pos['GISAx-P'] = pos['GISAx'].rank(pct=True)
|
636
|
+
pos['NetGI-P'] = pos['NetGI'].rank(pct=True)
|
637
|
+
pos['NetxGI-P'] = pos['NetxGI'].rank(pct=True)
|
638
|
+
|
639
|
+
#...and then their totals
|
640
|
+
pos['Team-Adjusted-EGI-T'] = (pos['Team-Adjusted-EGI']/60)*pos['TOI']
|
641
|
+
pos['GISAx-T'] = (pos['GISAx']/60)*pos['TOI']
|
642
|
+
pos['NetGI-T'] = (pos['NetGI']/60)*pos['TOI']
|
643
|
+
pos['NetxGI-T'] = (pos['NetxGI']/60)*pos['TOI']
|
644
|
+
|
559
645
|
#Return: team stats with shooting impacts
|
560
|
-
return pos.drop(columns=['fsh','fenwick','xg_fen','xg','g','finishing']).sort_values(['Season','Team'])
|
646
|
+
return pos.drop(columns=['fsh','fenwick','xg_fen','xg','g','finishing']).sort_values(['Goalie','Season','Team'])
|
647
|
+
|
648
|
+
elif type =='team':
|
649
|
+
pos = agg
|
650
|
+
for group in [('OOFF','F'),('ODEF','A')]:
|
651
|
+
#Have to set this columns for compatibility with df.apply
|
652
|
+
pos['fsh'] = pos[f'Fsh{group[1]}%']
|
653
|
+
pos['fenwick'] = pos[f'F{group[1]}/60']
|
654
|
+
pos['xg'] = pos[f'xG{group[1]}/60']
|
655
|
+
pos['g'] = pos[f'G{group[1]}/60']
|
656
|
+
pos['xg_fen'] = pos[f'xG{group[1]}/F{group[1]}']
|
657
|
+
pos['finishing'] = pos[f'G{group[1]}/xG{group[1]}']
|
658
|
+
|
659
|
+
#Find average for position in frame
|
660
|
+
avg_fen = pos['fenwick'].mean()
|
661
|
+
avg_xg = pos['xg'].mean()
|
662
|
+
avg_g = pos['g'].mean()
|
663
|
+
avg_fsh = avg_g/avg_fen
|
664
|
+
avg_xg_fen = avg_xg/avg_fen
|
561
665
|
|
666
|
+
#Calculate composite percentiles
|
667
|
+
pos[f'{group[0]}-SR'] = pos['fenwick'].rank(pct=True)
|
668
|
+
pos[f'{group[0]}-SQ'] = pos['xg_fen'].rank(pct=True)
|
669
|
+
pos[f'{group[0]}-FN'] = pos['finishing'].rank(pct=True)
|
670
|
+
|
671
|
+
#Calculate shot rate, shot quality, and finishing impacts
|
672
|
+
pos[f'{group[0]}-SRI'] = pos['g'] - pos.apply(lambda x: goal_comp(avg_fen,x.xg_fen,x.xg,x.g,avg_fsh),axis=1)
|
673
|
+
pos[f'{group[0]}-SQI'] = pos['g'] - pos.apply(lambda x: goal_comp(x.fenwick,avg_xg_fen,x.xg,x.g,avg_fsh),axis=1)
|
674
|
+
pos[f'{group[0]}-FNI'] = pos['g'] - pos.apply(lambda x: goal_comp(x.fenwick,x.xg_fen,avg_xg,avg_g,avg_fsh),axis=1)
|
675
|
+
|
676
|
+
#Convert impacts to totals
|
677
|
+
#Calculate shot rate, shot quality, and finishing impacts
|
678
|
+
pos[f'{group[0]}-SRI-T'] = (pos[f'{group[0]}-SRI']/60)*pos['TOI']
|
679
|
+
pos[f'{group[0]}-SQI-T'] = (pos[f'{group[0]}-SQI']/60)*pos['TOI']
|
680
|
+
pos[f'{group[0]}-FNI-T'] = (pos[f'{group[0]}-FNI']/60)*pos['TOI']
|
681
|
+
|
682
|
+
#Rank per 60 stats
|
683
|
+
for stat in per_sixty[10:len(per_sixty)]:
|
684
|
+
pos[f'{stat}/60-P'] = pos[f'{stat}/60'].rank(pct=True)
|
685
|
+
|
686
|
+
#Flip percentiles for against stats
|
687
|
+
for stat in ['FA','xGA','GA','CA','HA','Give','Penl','Penl2','Penl5']:
|
688
|
+
pos[f'{stat}/60-P'] = 1-pos[f'{stat}/60-P']
|
689
|
+
|
690
|
+
#Add extra metrics
|
691
|
+
pos['RushF/60'] = (pos['RushF']/pos['TOI'])*60
|
692
|
+
pos['RushA/60'] = (pos['RushA']/pos['TOI'])*60
|
693
|
+
pos['RushesFF'] = pos['RushF/60'].rank(pct=True)
|
694
|
+
pos['RushesFA'] = 1 - pos['RushA/60'].rank(pct=True)
|
695
|
+
pos['RushFxG/60'] = (pos['RushFxG']/pos['TOI'])*60
|
696
|
+
pos['RushAxG/60'] = (pos['RushAxG']/pos['TOI'])*60
|
697
|
+
pos['RushesxGF'] = pos['RushFxG/60'].rank(pct=True)
|
698
|
+
pos['RushesxGA'] = 1 - pos['RushAxG/60'].rank(pct=True)
|
699
|
+
pos['RushFG/60'] = (pos['RushFG']/pos['TOI'])*60
|
700
|
+
pos['RushAG/60'] = (pos['RushAG']/pos['TOI'])*60
|
701
|
+
pos['RushesGF'] = pos['RushFG/60'].rank(pct=True)
|
702
|
+
pos['RushesGA'] = 1 - pos['RushAG/60'].rank(pct=True)
|
703
|
+
|
704
|
+
#Flip against metric percentiles
|
705
|
+
pos['ODEF-SR'] = 1-pos['ODEF-SR']
|
706
|
+
pos['ODEF-SQ'] = 1-pos['ODEF-SQ']
|
707
|
+
pos['ODEF-FN'] = 1-pos['ODEF-FN']
|
708
|
+
|
709
|
+
pos['EGF'] = pos['OOFF-SRI']+pos['OOFF-SQI']+pos['OOFF-FNI']
|
710
|
+
pos['ExGF'] = pos['OOFF-SRI']+pos['OOFF-SQI']
|
711
|
+
pos['EGA'] = pos['ODEF-SRI']+pos['ODEF-SQI']+pos['ODEF-FNI']
|
712
|
+
pos['ExGA'] = pos['ODEF-SRI']+pos['ODEF-SQI']
|
713
|
+
|
714
|
+
#...and their percentiles
|
715
|
+
pos['EGF-P'] = pos['EGF'].rank(pct=True)
|
716
|
+
pos['ExGF-P'] = pos['ExGF'].rank(pct=True)
|
717
|
+
pos['EGA-P'] = pos['EGA'].rank(pct=True)
|
718
|
+
pos['ExGA-P'] = pos['ExGA'].rank(pct=True)
|
719
|
+
|
720
|
+
pos['EGA-P'] = 1-pos['EGA']
|
721
|
+
pos['ExGA-P'] = 1-pos['ExGA']
|
722
|
+
|
723
|
+
#...and then their totals
|
724
|
+
pos['EGF-T'] = (pos['EGF']/60)*pos['TOI']
|
725
|
+
pos['ExGF-T'] = (pos['ExGF']/60)*pos['TOI']
|
726
|
+
pos['EGA-T'] = (pos['EGA']/60)*pos['TOI']
|
727
|
+
pos['ExGA-T'] = (pos['ExGA']/60)*pos['TOI']
|
728
|
+
|
729
|
+
#Return: team stats with shooting impacts
|
730
|
+
return pos.drop(columns=['fsh','fenwick','xg_fen','xg','g','finishing']).sort_values(['Season','Team'])
|
562
731
|
|
563
732
|
else:
|
564
733
|
#Remove skaters with less than 150 minutes of TOI then split between forwards and dmen
|
565
|
-
|
566
|
-
forwards = agg.loc[agg['Position']!='D']
|
567
|
-
defensemen = agg.loc[agg['Position']=='D']
|
734
|
+
#These are added back in after the fact
|
735
|
+
forwards = agg.loc[(agg['Position']!='D')&(agg['TOI']>=150)]
|
736
|
+
defensemen = agg.loc[(agg['Position']=='D')&(agg['TOI']>=150)]
|
737
|
+
non_players = agg.loc[agg['TOI']<150]
|
568
738
|
|
569
739
|
#Loop through both positions, all groupings (INDV, OOFF, and ODEF) generating impacts
|
570
740
|
for pos in [forwards,defensemen]:
|
@@ -594,15 +764,29 @@ def nhl_shooting_impacts(agg,team=False):
|
|
594
764
|
pos[f'{group[0]}-SQI'] = pos['g'] - pos.apply(lambda x: goal_comp(x.fenwick,avg_xg_fen,x.xg,x.g,avg_fsh),axis=1)
|
595
765
|
pos[f'{group[0]}-FNI'] = pos['g'] - pos.apply(lambda x: goal_comp(x.fenwick,x.xg_fen,avg_xg,avg_g,avg_fsh),axis=1)
|
596
766
|
|
767
|
+
#Convert impacts to totals
|
768
|
+
#Calculate shot rate, shot quality, and finishing impacts
|
769
|
+
pos[f'{group[0]}-SRI-T'] = (pos[f'{group[0]}-SRI']/60)*pos['TOI']
|
770
|
+
pos[f'{group[0]}-SQI-T'] = (pos[f'{group[0]}-SQI']/60)*pos['TOI']
|
771
|
+
pos[f'{group[0]}-FNI-T'] = (pos[f'{group[0]}-FNI']/60)*pos['TOI']
|
772
|
+
|
597
773
|
#Calculate On-Ice Involvement Percentiles
|
598
|
-
pos['
|
599
|
-
pos['
|
600
|
-
pos['
|
601
|
-
pos['
|
602
|
-
pos['
|
603
|
-
pos['
|
604
|
-
pos['
|
605
|
-
pos['
|
774
|
+
pos['Fi/F'] = pos['FC%'].rank(pct=True)
|
775
|
+
pos['xGi/F'] = pos['xGC%'].rank(pct=True)
|
776
|
+
pos['Pi/F'] = pos['GI%'].rank(pct=True)
|
777
|
+
pos['Gi/F'] = pos['GC%'].rank(pct=True)
|
778
|
+
pos['RushFi/60'] = (pos['Rush']/pos['TOI'])*60
|
779
|
+
pos['RushxGi/60'] = (pos['Rush xG']/pos['TOI'])*60
|
780
|
+
pos['RushesxGi'] = pos['RushxGi/60'].rank(pct=True)
|
781
|
+
pos['RushesFi'] = pos['RushFi/60'].rank(pct=True)
|
782
|
+
|
783
|
+
#Rank per 60 stats
|
784
|
+
for stat in per_sixty:
|
785
|
+
pos[f'{stat}/60-P'] = pos[f'{stat}/60'].rank(pct=True)
|
786
|
+
|
787
|
+
#Flip percentiles for against stats
|
788
|
+
for stat in ['FA','xGA','GA','CA','HA','Give','Penl','Penl2','Penl5']:
|
789
|
+
pos[f'{stat}/60-P'] = 1-pos[f'{stat}/60-P']
|
606
790
|
|
607
791
|
#Add positions back together
|
608
792
|
complete = pd.concat([forwards,defensemen])
|
@@ -613,108 +797,253 @@ def nhl_shooting_impacts(agg,team=False):
|
|
613
797
|
complete['ODEF-FN'] = 1-complete['ODEF-FN']
|
614
798
|
|
615
799
|
#Extraneous Values
|
616
|
-
complete['
|
617
|
-
complete['
|
618
|
-
complete['
|
619
|
-
complete['
|
620
|
-
complete['
|
621
|
-
complete['
|
622
|
-
|
623
|
-
|
624
|
-
complete['
|
625
|
-
complete['
|
626
|
-
complete['
|
627
|
-
complete['
|
628
|
-
complete['
|
629
|
-
complete['
|
630
|
-
|
800
|
+
complete['EGi'] = complete['INDV-SRI']+complete['INDV-SQI']+complete['INDV-FNI']
|
801
|
+
complete['ExGi'] = complete['INDV-SRI']+complete['INDV-SQI']
|
802
|
+
complete['EGF'] = complete['OOFF-SRI']+complete['OOFF-SQI']+complete['OOFF-FNI']
|
803
|
+
complete['ExGF'] = complete['OOFF-SRI']+complete['OOFF-SQI']
|
804
|
+
complete['EGA'] = complete['ODEF-SRI']+complete['ODEF-SQI']+complete['ODEF-FNI']
|
805
|
+
complete['ExGA'] = complete['ODEF-SRI']+complete['ODEF-SQI']
|
806
|
+
|
807
|
+
#...and their percentiles
|
808
|
+
complete['EGi-P'] = complete['EGi'].rank(pct=True)
|
809
|
+
complete['ExGi-P'] = complete['ExGi'].rank(pct=True)
|
810
|
+
complete['EGF-P'] = complete['EGF'].rank(pct=True)
|
811
|
+
complete['ExGF-P'] = complete['ExGF'].rank(pct=True)
|
812
|
+
complete['EGA-P'] = complete['EGA'].rank(pct=True)
|
813
|
+
complete['ExGA-P'] = complete['ExGA'].rank(pct=True)
|
814
|
+
|
815
|
+
complete['EGA-P'] = 1-complete['EGA']
|
816
|
+
complete['ExGA-P'] = 1-complete['ExGA']
|
817
|
+
|
818
|
+
#...and then their totals
|
819
|
+
complete['EGi-T'] = (complete['EGi']/60)*complete['TOI']
|
820
|
+
complete['ExGi-T'] = (complete['ExGi']/60)*complete['TOI']
|
821
|
+
complete['EGF-T'] = (complete['EGF']/60)*complete['TOI']
|
822
|
+
complete['ExGF-T'] = (complete['ExGF']/60)*complete['TOI']
|
823
|
+
complete['EGA-T'] = (complete['EGA']/60)*complete['TOI']
|
824
|
+
complete['ExGA-T'] = (complete['ExGA']/60)*complete['TOI']
|
825
|
+
|
826
|
+
#Goal Composites...
|
827
|
+
complete['LiEG'] = complete['EGF'] - complete['EGi']
|
828
|
+
complete['LiExG'] = complete['ExGF'] - complete['ExGi']
|
829
|
+
complete['LiGIn'] = complete['LiEG']*complete['AC%']
|
830
|
+
complete['LixGIn'] = complete['LiExG']*complete['AC%']
|
831
|
+
complete['ALiGIn'] = complete['LiGIn']-complete['LixGIn']
|
832
|
+
complete['CompGI'] = complete['EGi'] + complete['LiGIn']
|
833
|
+
complete['LiRelGI'] = complete['CompGI'] - (complete['EGF']-complete['CompGI'])
|
834
|
+
complete['NetGI'] = complete['EGF'] - complete['EGA']
|
835
|
+
complete['NetxGI'] = complete['ExGF'] - complete['ExGA']
|
836
|
+
|
837
|
+
#...and their percentiles
|
838
|
+
complete['LiEG-P'] = complete['LiEG'].rank(pct=True)
|
839
|
+
complete['LiExG-P'] = complete['LiExG'].rank(pct=True)
|
840
|
+
complete['LiGIn-P'] = complete['LiGIn'].rank(pct=True)
|
841
|
+
complete['LixGIn-P'] = complete['LixGIn'].rank(pct=True)
|
842
|
+
complete['ALiGIn-P'] = complete['ALiGIn'].rank(pct=True)
|
843
|
+
complete['CompGI-P'] = complete['CompGI'].rank(pct=True)
|
844
|
+
complete['LiRelGI-P'] = complete['LiRelGI'].rank(pct=True)
|
845
|
+
complete['NetGI-P'] = complete['NetGI'].rank(pct=True)
|
846
|
+
complete['NetxGI-P'] = complete['NetxGI'].rank(pct=True)
|
847
|
+
|
848
|
+
#..and then their totals
|
849
|
+
complete['LiEG-T'] = (complete['LiEG']/60)*complete['TOI']
|
850
|
+
complete['LiExG-T'] = (complete['LiExG']/60)*complete['TOI']
|
851
|
+
complete['LiGIn-T'] = (complete['LiGIn']/60)*complete['TOI']
|
852
|
+
complete['LixGIn-T'] = (complete['LixGIn']/60)*complete['TOI']
|
853
|
+
complete['ALiGIn-T'] = (complete['ALiGIn']/60)*complete['TOI']
|
854
|
+
complete['CompGI-T'] = (complete['CompGI']/60)*complete['TOI']
|
855
|
+
complete['LiRelGI-T'] = (complete['LiRelGI']/60)*complete['TOI']
|
856
|
+
complete['NetGI-T'] = (complete['NetGI']/60)*complete['TOI']
|
857
|
+
complete['NetxGI-T'] = (complete['NetxGI']/60)*complete['TOI']
|
858
|
+
|
859
|
+
#Add back skaters with less than 150 minutes TOI
|
860
|
+
df = pd.concat([complete,non_players]).drop(columns=['fsh','fenwick','xg_fen','xg','g','finishing']).sort_values(['Player','Season','Team','ID'])
|
631
861
|
#Return: skater stats with shooting impacts
|
632
|
-
return
|
862
|
+
return df
|
633
863
|
|
634
|
-
def nhl_calculate_stats(pbp,type,season_types,game_strength,roster_path="rosters/nhl_rosters.csv",
|
864
|
+
def nhl_calculate_stats(pbp,type,season_types,game_strength,split_game=False,roster_path="rosters/nhl_rosters.csv",shot_impact=False):
|
635
865
|
#Given play-by-play, seasonal information, game_strength, rosters, and xG model, return aggregated stats
|
636
866
|
# param 'pbp' - play-by-play dataframe
|
637
|
-
# param 'type' - type of stats to calculate ('skater', '
|
867
|
+
# param 'type' - type of stats to calculate ('skater', 'goalie', or 'team')
|
638
868
|
# param 'season' - season or timeframe of events in play-by-play
|
639
869
|
# param 'season_type' - list of season types (preseason, regular season, or playoffs) to include in aggregation
|
640
870
|
# param 'game_strength' - list of game_strengths to include in aggregation
|
871
|
+
# param 'split_game' - boolean which if true groups aggregation by game
|
641
872
|
# param 'roster_path' - path to roster file
|
642
|
-
# param 'xg' - xG model to apply to pbp for aggregation
|
643
873
|
# param 'shot_impact' - boolean determining if the shot impact model will be applied to the dataset
|
644
874
|
|
645
|
-
print(f"Calculating statistics for all games in the provided play-by-play data for {type}s...\nSeasons included: {pbp['season'].drop_duplicates().to_list()}...")
|
875
|
+
print(f"Calculating statistics for all games in the provided play-by-play data at {game_strength} for {type}s...\nSeasons included: {pbp['season'].drop_duplicates().to_list()}...")
|
646
876
|
start = time.perf_counter()
|
647
877
|
|
648
|
-
#Add extra data and apply team changes
|
649
|
-
pbp = prep_xG_data(pbp).replace(convert_team_abbr)
|
650
|
-
|
651
878
|
#Check if xG column exists and apply model if it does not
|
652
879
|
try:
|
653
880
|
pbp['xG']
|
654
|
-
except KeyError:
|
655
|
-
|
656
|
-
pbp = wsba_xG(pbp)
|
657
|
-
else:
|
658
|
-
pbp = moneypuck_xG(pbp)
|
881
|
+
except KeyError:
|
882
|
+
pbp = wsba_xG(pbp)
|
659
883
|
|
660
|
-
#Filter by season types and remove
|
661
|
-
|
884
|
+
#Filter by season types, remove shootouts, remove shots with no coordinates, and remove shots on empty nets
|
885
|
+
pbp_noshot = pbp.loc[(pbp['season_type'].isin(season_types)) & ~(pbp['event_type'].isin(fenwick_events))]
|
886
|
+
|
887
|
+
#Include everything when strengths is set to 'all'
|
888
|
+
if game_strength == 'all':
|
889
|
+
mask = ((pbp['event_type'].isin(fenwick_events)) & (pbp['empty_net']<1))
|
890
|
+
else:
|
891
|
+
mask = ((pbp['event_type'].isin(fenwick_events)) & (pbp['empty_net']<1) & (pbp['x'].notna()) & (pbp['y'].notna()))
|
662
892
|
|
893
|
+
pbp_shot = pbp.loc[(pbp['season_type'].isin(season_types)) & mask]
|
894
|
+
|
895
|
+
pbp = pd.concat([pbp_shot,pbp_noshot])
|
896
|
+
|
663
897
|
#Convert all columns with player ids to float in order to avoid merging errors
|
664
898
|
for col in get_col():
|
665
899
|
if "_id" in col:
|
666
900
|
try: pbp[col] = pbp[col].astype(float)
|
667
901
|
except KeyError: continue
|
668
902
|
|
669
|
-
#
|
670
|
-
if
|
671
|
-
|
903
|
+
#Split by game if specified
|
904
|
+
if split_game:
|
905
|
+
second_group = ['season','game_id']
|
906
|
+
else:
|
907
|
+
second_group = ['season']
|
672
908
|
|
673
909
|
#Split calculation
|
674
|
-
if type == '
|
675
|
-
complete =
|
676
|
-
|
677
|
-
#WSBA
|
678
|
-
complete['WSBA'] = complete['Team']+complete['Season'].astype(str)
|
910
|
+
if type == 'goalie':
|
911
|
+
complete = calc_goalie(pbp,game_strength,second_group)
|
679
912
|
|
680
913
|
#Set TOI to minute
|
681
914
|
complete['TOI'] = complete['TOI']/60
|
682
915
|
|
683
916
|
#Add per 60 stats
|
684
|
-
for stat in
|
917
|
+
for stat in ['FF','FA','xGF','xGA','GF','GA','CF','CA','GSAx']:
|
685
918
|
complete[f'{stat}/60'] = (complete[stat]/complete['TOI'])*60
|
919
|
+
|
920
|
+
complete['GF%'] = complete['GF']/(complete['GF']+complete['GA'])
|
921
|
+
complete['xGF%'] = complete['xGF']/(complete['xGF']+complete['xGA'])
|
922
|
+
complete['FF%'] = complete['FF']/(complete['FF']+complete['FA'])
|
923
|
+
complete['CF%'] = complete['CF']/(complete['CF']+complete['CA'])
|
924
|
+
|
925
|
+
#Remove entries with no ID listed
|
926
|
+
complete = complete.loc[complete['ID'].notna()]
|
686
927
|
|
687
|
-
#
|
688
|
-
|
689
|
-
|
928
|
+
#Import rosters and player info
|
929
|
+
rosters = pd.read_csv(roster_path)
|
930
|
+
names = rosters[['id','fullName',
|
931
|
+
'headshot','positionCode','shootsCatches',
|
932
|
+
'heightInInches','weightInPounds',
|
933
|
+
'birthDate','birthCountry']].drop_duplicates(subset=['id','fullName'],keep='last')
|
690
934
|
|
691
|
-
#
|
692
|
-
|
693
|
-
|
935
|
+
#Add names
|
936
|
+
complete = pd.merge(complete,names,how='left',left_on='ID',right_on='id')
|
937
|
+
|
938
|
+
#Rename if there are no missing names
|
939
|
+
complete = complete.rename(columns={'fullName':'Goalie',
|
940
|
+
'headshot':'Headshot',
|
941
|
+
'positionCode':'Position',
|
942
|
+
'shootsCatches':'Handedness',
|
943
|
+
'heightInInches':'Height (in)',
|
944
|
+
'weightInPounds':'Weight (lbs)',
|
945
|
+
'birthDate':'Birthday',
|
946
|
+
'birthCountry':'Nationality'})
|
947
|
+
|
948
|
+
#WSBA
|
949
|
+
complete['WSBA'] = complete['Goalie']+complete['Team']+complete['Season'].astype(str)
|
950
|
+
|
951
|
+
#Add player age
|
952
|
+
complete['Birthday'] = pd.to_datetime(complete['Birthday'])
|
953
|
+
complete['season_year'] = complete['Season'].astype(str).str[4:8].astype(int)
|
954
|
+
complete['Age'] = complete['season_year'] - complete['Birthday'].dt.year
|
955
|
+
|
956
|
+
#Find player headshot
|
957
|
+
complete['Headshot'] = 'https://assets.nhle.com/mugs/nhl/'+complete['Season'].astype(str)+'/'+complete['Team']+'/'+complete['ID'].astype(int).astype(str)+'.png'
|
694
958
|
|
695
959
|
end = time.perf_counter()
|
696
960
|
length = end-start
|
697
961
|
print(f'...finished in {(length if length <60 else length/60):.2f} {'seconds' if length <60 else 'minutes'}.')
|
698
|
-
|
962
|
+
|
963
|
+
head = ['Goalie','ID','Game'] if 'Game' in complete.columns else ['Goalie','ID']
|
964
|
+
complete = complete[head+[
|
965
|
+
"Season","Team",'WSBA',
|
966
|
+
'Headshot','Position','Handedness',
|
967
|
+
'Height (in)','Weight (lbs)',
|
968
|
+
'Birthday','Age','Nationality',
|
969
|
+
'GP','TOI',
|
970
|
+
"GF","FF","xGF","xGF/FF","GF/xGF","FshF%",
|
971
|
+
"GA","FA","xGA","xGA/FA","GA/xGA","FshA%",
|
972
|
+
'CF','CA',
|
973
|
+
'GSAx',
|
974
|
+
'RushF','RushA','RushFxG','RushAxG','RushFG','RushAG'
|
975
|
+
]+[f'{stat}/60' for stat in ['FF','FA','xGF','xGA','GF','GA','CF','CA','GSAx']]]
|
976
|
+
|
977
|
+
#Apply shot impacts if necessary
|
699
978
|
if shot_impact:
|
700
|
-
|
701
|
-
|
702
|
-
|
979
|
+
complete = nhl_shooting_impacts(complete,'goalie')
|
980
|
+
|
981
|
+
end = time.perf_counter()
|
982
|
+
length = end-start
|
983
|
+
print(f'...finished in {(length if length <60 else length/60):.2f} {'seconds' if length <60 else 'minutes'}.')
|
984
|
+
|
985
|
+
return complete
|
986
|
+
|
987
|
+
elif type == 'team':
|
988
|
+
complete = calc_team(pbp,game_strength,second_group)
|
989
|
+
|
990
|
+
#WSBA
|
991
|
+
complete['WSBA'] = complete['Team']+complete['Season'].astype(str)
|
992
|
+
|
993
|
+
#Set TOI to minute
|
994
|
+
complete['TOI'] = complete['TOI']/60
|
995
|
+
|
996
|
+
#Add per 60 stats
|
997
|
+
for stat in per_sixty[10:len(per_sixty)]:
|
998
|
+
complete[f'{stat}/60'] = (complete[stat]/complete['TOI'])*60
|
999
|
+
|
1000
|
+
complete['GF%'] = complete['GF']/(complete['GF']+complete['GA'])
|
1001
|
+
complete['xGF%'] = complete['xGF']/(complete['xGF']+complete['xGA'])
|
1002
|
+
complete['FF%'] = complete['FF']/(complete['FF']+complete['FA'])
|
1003
|
+
complete['CF%'] = complete['CF']/(complete['CF']+complete['CA'])
|
1004
|
+
|
1005
|
+
head = ['Team','Game'] if 'Game' in complete.columns else ['Team']
|
1006
|
+
complete = complete[head+[
|
1007
|
+
'Season','WSBA',
|
1008
|
+
'GP','TOI',
|
1009
|
+
"GF","FF","xGF","xGF/FF","GF/xGF","FshF%",
|
1010
|
+
"GA","FA","xGA","xGA/FA","GA/xGA","FshA%",
|
1011
|
+
'CF','CA',
|
1012
|
+
'GF%','FF%','xGF%','CF%',
|
1013
|
+
'HF','HA','HF%',
|
1014
|
+
'Penl','Penl2','Penl5','PIM','Draw','PENL%',
|
1015
|
+
'Give','Take','PM%',
|
1016
|
+
'Block',
|
1017
|
+
'RushF','RushA','RushFxG','RushAxG','RushFG','RushAG'
|
1018
|
+
]+[f'{stat}/60' for stat in per_sixty[10:len(per_sixty)]]]
|
1019
|
+
#Apply shot impacts if necessary
|
1020
|
+
if shot_impact:
|
1021
|
+
complete = nhl_shooting_impacts(complete,'team')
|
1022
|
+
|
1023
|
+
end = time.perf_counter()
|
1024
|
+
length = end-start
|
1025
|
+
print(f'...finished in {(length if length <60 else length/60):.2f} {'seconds' if length <60 else 'minutes'}.')
|
1026
|
+
|
1027
|
+
return complete
|
703
1028
|
else:
|
704
|
-
indv_stats = calc_indv(pbp)
|
705
|
-
onice_stats = calc_onice(pbp)
|
1029
|
+
indv_stats = calc_indv(pbp,game_strength,second_group)
|
1030
|
+
onice_stats = calc_onice(pbp,game_strength,second_group)
|
706
1031
|
|
707
1032
|
#IDs sometimes set as objects
|
708
1033
|
indv_stats['ID'] = indv_stats['ID'].astype(float)
|
709
1034
|
onice_stats['ID'] = onice_stats['ID'].astype(float)
|
710
1035
|
|
711
1036
|
#Merge and add columns for extra stats
|
712
|
-
complete = pd.merge(indv_stats,onice_stats,how="outer",on=['ID','Team','Season'])
|
1037
|
+
complete = pd.merge(indv_stats,onice_stats,how="outer",on=['ID','Team','Season']+(['Game'] if 'game_id' in second_group else []))
|
713
1038
|
complete['GC%'] = complete['Gi']/complete['GF']
|
714
1039
|
complete['AC%'] = (complete['A1']+complete['A2'])/complete['GF']
|
715
1040
|
complete['GI%'] = (complete['Gi']+complete['A1']+complete['A2'])/complete['GF']
|
716
1041
|
complete['FC%'] = complete['Fi']/complete['FF']
|
717
1042
|
complete['xGC%'] = complete['xGi']/complete['xGF']
|
1043
|
+
complete['GF%'] = complete['GF']/(complete['GF']+complete['GA'])
|
1044
|
+
complete['xGF%'] = complete['xGF']/(complete['xGF']+complete['xGA'])
|
1045
|
+
complete['FF%'] = complete['FF']/(complete['FF']+complete['FA'])
|
1046
|
+
complete['CF%'] = complete['CF']/(complete['CF']+complete['CA'])
|
718
1047
|
|
719
1048
|
#Remove entries with no ID listed
|
720
1049
|
complete = complete.loc[complete['ID'].notna()]
|
@@ -742,18 +1071,6 @@ def nhl_calculate_stats(pbp,type,season_types,game_strength,roster_path="rosters
|
|
742
1071
|
#Set TOI to minute
|
743
1072
|
complete['TOI'] = complete['TOI']/60
|
744
1073
|
|
745
|
-
#Add per 60 stats
|
746
|
-
for stat in per_sixty:
|
747
|
-
complete[f'{stat}/60'] = (complete[stat]/complete['TOI'])*60
|
748
|
-
|
749
|
-
#Rank per 60 stats
|
750
|
-
for stat in per_sixty:
|
751
|
-
complete[f'{stat}/60 Percentile'] = complete[f'{stat}/60'].rank(pct=True)
|
752
|
-
|
753
|
-
#Flip percentiles for against stats
|
754
|
-
for stat in ['FA','xGA','GA']:
|
755
|
-
complete[f'{stat}/60 Percentile'] = 1-complete[f'{stat}/60 Percentile']
|
756
|
-
|
757
1074
|
#Add player age
|
758
1075
|
complete['Birthday'] = pd.to_datetime(complete['Birthday'])
|
759
1076
|
complete['season_year'] = complete['Season'].astype(str).str[4:8].astype(int)
|
@@ -762,42 +1079,56 @@ def nhl_calculate_stats(pbp,type,season_types,game_strength,roster_path="rosters
|
|
762
1079
|
#Find player headshot
|
763
1080
|
complete['Headshot'] = 'https://assets.nhle.com/mugs/nhl/'+complete['Season'].astype(str)+'/'+complete['Team']+'/'+complete['ID'].astype(int).astype(str)+'.png'
|
764
1081
|
|
765
|
-
end = time.perf_counter()
|
766
|
-
length = end-start
|
767
1082
|
#Remove goalies that occasionally appear in a set
|
768
1083
|
complete = complete.loc[complete['Position']!='G']
|
769
1084
|
#Add WSBA ID
|
770
1085
|
complete['WSBA'] = complete['Player']+complete['Season'].astype(str)+complete['Team']
|
771
1086
|
|
1087
|
+
#Add per 60 stats
|
1088
|
+
for stat in per_sixty:
|
1089
|
+
complete[f'{stat}/60'] = (complete[stat]/complete['TOI'])*60
|
1090
|
+
|
772
1091
|
#Shot Type Metrics
|
773
1092
|
type_metrics = []
|
774
1093
|
for type in shot_types:
|
775
1094
|
for stat in per_sixty[:3]:
|
776
1095
|
type_metrics.append(f'{type.capitalize()}{stat}')
|
777
1096
|
|
778
|
-
|
779
|
-
|
1097
|
+
head = ['Player','ID','Game'] if 'Game' in complete.columns else ['Player','ID']
|
1098
|
+
complete = complete[head+[
|
780
1099
|
"Season","Team",'WSBA',
|
781
1100
|
'Headshot','Position','Handedness',
|
782
1101
|
'Height (in)','Weight (lbs)',
|
783
1102
|
'Birthday','Age','Nationality',
|
784
1103
|
'GP','TOI',
|
785
1104
|
"Gi","A1","A2",'P1','P',
|
1105
|
+
'Give','Take','PM%','HF','HA','HF%',
|
786
1106
|
"Fi","xGi",'xGi/Fi',"Gi/xGi","Fshi%",
|
787
1107
|
"GF","FF","xGF","xGF/FF","GF/xGF","FshF%",
|
788
1108
|
"GA","FA","xGA","xGA/FA","GA/xGA","FshA%",
|
1109
|
+
'Ci','CF','CA','CF%',
|
1110
|
+
'FF%','xGF%','GF%',
|
789
1111
|
'Rush',"Rush xG",'Rush G',"GC%","AC%","GI%","FC%","xGC%",
|
790
|
-
|
1112
|
+
'F','FW','FL','F%',
|
1113
|
+
'Penl','Penl2','Penl5',
|
1114
|
+
'Draw','PIM','PENL%',
|
1115
|
+
'Block',
|
1116
|
+
'OZF','NZF','DZF',
|
1117
|
+
'OZF%','NZF%','DZF%',
|
1118
|
+
]+[f'{stat}/60' for stat in per_sixty]+type_metrics].fillna(0).sort_values(['Player','Season','Team','ID'])
|
791
1119
|
|
792
|
-
print(f'...finished in {(length if length <60 else length/60):.2f} {'seconds' if length <60 else 'minutes'}.')
|
793
1120
|
#Apply shot impacts if necessary (Note: this will remove skaters with fewer than 150 minutes of TOI due to the shot impact TOI rule)
|
794
1121
|
if shot_impact:
|
795
|
-
|
796
|
-
|
797
|
-
|
1122
|
+
complete = nhl_shooting_impacts(complete,'skater')
|
1123
|
+
|
1124
|
+
end = time.perf_counter()
|
1125
|
+
length = end-start
|
1126
|
+
print(f'...finished in {(length if length <60 else length/60):.2f} {'seconds' if length <60 else 'minutes'}.')
|
798
1127
|
|
799
|
-
|
800
|
-
|
1128
|
+
return complete
|
1129
|
+
|
1130
|
+
def nhl_plot_skaters_shots(pbp,skater_dict,strengths,marker_dict=event_markers,onice = 'indv',title = True,legend=False):
|
1131
|
+
#Returns dict of plots for specified skaters
|
801
1132
|
# param 'pbp' - pbp to plot data
|
802
1133
|
# param 'skater_dict' - skaters to plot shots for (format: {'Patrice Bergeron':['20242025','BOS']})
|
803
1134
|
# param 'strengths' - strengths to include in plotting
|
@@ -809,18 +1140,19 @@ def nhl_plot_skaters_shots(pbp,skater_dict,strengths,marker_dict=event_markers,o
|
|
809
1140
|
|
810
1141
|
print(f'Plotting the following skater shots: {skater_dict}...')
|
811
1142
|
|
812
|
-
#Iterate through
|
813
|
-
skater_plots =
|
1143
|
+
#Iterate through skaters, adding plots to dict
|
1144
|
+
skater_plots = {}
|
814
1145
|
for skater in skater_dict.keys():
|
815
1146
|
skater_info = skater_dict[skater]
|
816
1147
|
title = f'{skater} Fenwick Shots for {skater_info[1]} in {skater_info[0][2:4]}-{skater_info[0][6:8]}' if title else ''
|
817
|
-
|
1148
|
+
#Key is formatted as PLAYERSEASONTEAM (i.e. PATRICE BERGERON20212022BOS)
|
1149
|
+
skater_plots.update({f'{skater}{skater_info[0]}{skater_info[1]}':[plot_skater_shots(pbp,skater,skater_info[0],skater_info[1],strengths,title,marker_dict,onice,legend)]})
|
818
1150
|
|
819
1151
|
#Return: list of plotted skater shot charts
|
820
1152
|
return skater_plots
|
821
1153
|
|
822
|
-
def nhl_plot_games(pbp,events,strengths,game_ids='all',marker_dict=event_markers,team_colors={'away':'primary','home':'primary'},legend=False
|
823
|
-
#Returns
|
1154
|
+
def nhl_plot_games(pbp,events,strengths,game_ids='all',marker_dict=event_markers,team_colors={'away':'primary','home':'primary'},legend=False):
|
1155
|
+
#Returns dict of plots for specified games
|
824
1156
|
# param 'pbp' - pbp to plot data
|
825
1157
|
# param 'events' - type of events to plot
|
826
1158
|
# param 'strengths' - strengths to include in plotting
|
@@ -835,8 +1167,10 @@ def nhl_plot_games(pbp,events,strengths,game_ids='all',marker_dict=event_markers
|
|
835
1167
|
|
836
1168
|
print(f'Plotting the following games: {game_ids}...')
|
837
1169
|
|
838
|
-
|
839
|
-
|
1170
|
+
game_plots = {}
|
1171
|
+
#Iterate through games, adding plot to dict
|
1172
|
+
for game in game_ids:
|
1173
|
+
game_plots.update({game:[plot_game_events(pbp,game,events,strengths,marker_dict,team_colors,legend)]})
|
840
1174
|
|
841
1175
|
#Return: list of plotted game events
|
842
1176
|
return game_plots
|
@@ -872,7 +1206,7 @@ def repo_load_pbp(seasons = []):
|
|
872
1206
|
|
873
1207
|
#Add parquet to total
|
874
1208
|
print(f'Loading play-by-play from the following seasons: {seasons}...')
|
875
|
-
dfs = [pd.read_parquet(f"https://
|
1209
|
+
dfs = [pd.read_parquet(f"https://f005.backblazeb2.com/file/weakside-breakout/pbp/{season}.parquet") for season in seasons]
|
876
1210
|
|
877
1211
|
return pd.concat(dfs)
|
878
1212
|
|
@@ -880,9 +1214,3 @@ def repo_load_seasons():
|
|
880
1214
|
#List of available seasons to scrape
|
881
1215
|
|
882
1216
|
return seasons
|
883
|
-
|
884
|
-
def admin_convert_to_parquet(seasons):
|
885
|
-
for season in seasons:
|
886
|
-
load = pd.read_csv(f'pbp/csv/nhl_pbp_{season}.csv')
|
887
|
-
|
888
|
-
load.to_parquet(f'pbp/parquet/nhl_pbp_{season}.parquet',index=False)
|