wsba-hockey 1.0.3__py3-none-any.whl → 1.0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- wsba_hockey/__init__.py +1 -1
- wsba_hockey/data_pipelines.py +183 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/duckdb/vendor.py +146 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/flatted/python/flatted.py +149 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/flatted/python/test.py +63 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/gyp_main.py +45 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSNew.py +367 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSProject.py +206 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSSettings.py +1270 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSSettings_test.py +1547 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSToolFile.py +59 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSUserFile.py +153 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSUtil.py +271 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSVersion.py +574 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/__init__.py +690 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/common.py +661 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/common_test.py +78 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/easy_xml.py +165 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/easy_xml_test.py +109 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/flock_tool.py +55 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/__init__.py +0 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/analyzer.py +808 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/android.py +1173 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/cmake.py +1321 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/compile_commands_json.py +120 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/dump_dependency_json.py +103 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/eclipse.py +464 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/gypd.py +89 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/gypsh.py +58 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/make.py +2714 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/msvs.py +3981 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/msvs_test.py +44 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/ninja.py +2936 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/ninja_test.py +55 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/xcode.py +1394 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/xcode_test.py +25 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/input.py +3130 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/input_test.py +98 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/mac_tool.py +771 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/msvs_emulation.py +1271 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/ninja_syntax.py +174 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/simple_copy.py +61 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/win_tool.py +374 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/xcode_emulation.py +1939 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/xcode_ninja.py +302 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/xcodeproj_file.py +3197 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/xml_fix.py +65 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/test_gyp.py +261 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/tools/graphviz.py +102 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/tools/pretty_gyp.py +156 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/tools/pretty_sln.py +181 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/tools/pretty_vcproj.py +339 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/test/fixtures/test-charmap.py +31 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/update-gyp.py +64 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/gyp_main.py +45 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSNew.py +367 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSProject.py +206 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSSettings.py +1270 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSSettings_test.py +1547 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSToolFile.py +59 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSUserFile.py +153 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSUtil.py +271 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSVersion.py +574 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/__init__.py +666 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/common.py +654 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/common_test.py +78 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/easy_xml.py +165 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/easy_xml_test.py +109 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/flock_tool.py +55 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/__init__.py +0 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/analyzer.py +808 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/android.py +1173 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/cmake.py +1321 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/compile_commands_json.py +120 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/dump_dependency_json.py +103 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/eclipse.py +464 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/gypd.py +89 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/gypsh.py +58 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/make.py +2518 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/msvs.py +3978 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/msvs_test.py +44 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/ninja.py +2936 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/ninja_test.py +55 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/xcode.py +1394 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/xcode_test.py +25 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/input.py +3137 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/input_test.py +98 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/mac_tool.py +771 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/msvs_emulation.py +1271 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/ninja_syntax.py +174 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/simple_copy.py +61 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/win_tool.py +374 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/xcode_emulation.py +1939 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/xcode_ninja.py +302 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/xcodeproj_file.py +3197 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/xml_fix.py +65 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/setup.py +42 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/test_gyp.py +260 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/tools/graphviz.py +102 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/tools/pretty_gyp.py +156 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/tools/pretty_sln.py +181 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/tools/pretty_vcproj.py +339 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/test/fixtures/test-charmap.py +31 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/update-gyp.py +46 -0
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/game_stats/app.py +400 -0
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/game_stats/name_fix.py +47 -0
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/heatmaps/app.py +108 -0
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/heatmaps/plot.py +93 -0
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/heatmaps/rink_plot.py +245 -0
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/matchups/app.py +145 -0
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/matchups/plot.py +77 -0
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/matchups/rink_plot.py +245 -0
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/pbp/app.py +389 -0
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/pbp/plot.py +70 -0
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/pbp/rink_plot.py +245 -0
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/skater/app.py +110 -0
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/skater/plot.py +58 -0
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/skater/rink_plot.py +245 -0
- wsba_hockey/tools/agg.py +243 -54
- wsba_hockey/tools/plotting.py +25 -25
- wsba_hockey/tools/scraping.py +154 -263
- wsba_hockey/tools/xg_model.py +369 -315
- wsba_hockey/workspace.py +22 -117
- wsba_hockey/wsba_main.py +499 -167
- {wsba_hockey-1.0.3.dist-info → wsba_hockey-1.0.5.dist-info}/METADATA +1 -1
- wsba_hockey-1.0.5.dist-info/RECORD +135 -0
- {wsba_hockey-1.0.3.dist-info → wsba_hockey-1.0.5.dist-info}/WHEEL +1 -1
- wsba_hockey/stats/calculate_viz/shot_impact.py +0 -2
- wsba_hockey-1.0.3.dist-info/RECORD +0 -19
- {wsba_hockey-1.0.3.dist-info → wsba_hockey-1.0.5.dist-info}/licenses/LICENSE +0 -0
- {wsba_hockey-1.0.3.dist-info → wsba_hockey-1.0.5.dist-info}/top_level.txt +0 -0
wsba_hockey/tools/scraping.py
CHANGED
@@ -1,11 +1,12 @@
|
|
1
1
|
import re
|
2
|
-
|
3
|
-
import
|
4
|
-
import json as json_lib
|
5
|
-
from .utils.shared import *
|
2
|
+
import warnings
|
3
|
+
import os
|
6
4
|
import numpy as np
|
7
5
|
import pandas as pd
|
8
|
-
import
|
6
|
+
import requests as rs
|
7
|
+
import json as json_lib
|
8
|
+
from bs4 import BeautifulSoup
|
9
|
+
from wsba_hockey.tools.utils.shared import *
|
9
10
|
warnings.filterwarnings('ignore')
|
10
11
|
|
11
12
|
### SCRAPING FUNCTIONS ###
|
@@ -27,8 +28,9 @@ def get_col():
|
|
27
28
|
return [
|
28
29
|
'season','season_type','game_id','game_date',"start_time","venue","venue_location",
|
29
30
|
'away_team_abbr','home_team_abbr','event_num','period','period_type',
|
30
|
-
'seconds_elapsed',"
|
31
|
-
"event_type_code","event_type","description","
|
31
|
+
'seconds_elapsed',"strength_state","strength_state_venue","home_team_defending_side",
|
32
|
+
"event_type_code","event_type","description","event_reason",
|
33
|
+
"penalty_type","penalty_duration","penalty_attribution",
|
32
34
|
"event_team_abbr","event_team_venue",
|
33
35
|
'num_on', 'players_on','ids_on','num_off','players_off','ids_off','shift_type',
|
34
36
|
"event_player_1_name","event_player_2_name","event_player_3_name",
|
@@ -38,7 +40,7 @@ def get_col():
|
|
38
40
|
"shot_type","zone_code","x","y","x_fixed","y_fixed","x_adj","y_adj",
|
39
41
|
"event_skaters","away_skaters","home_skaters",
|
40
42
|
"event_distance","event_angle","event_length","seconds_since_last",
|
41
|
-
"away_score","home_score", "away_fenwick", "home_fenwick",
|
43
|
+
"away_score","home_score", "away_fenwick", "home_fenwick",
|
42
44
|
"away_on_1","away_on_2","away_on_3","away_on_4","away_on_5","away_on_6","away_goalie",
|
43
45
|
"home_on_1","home_on_2","home_on_3","home_on_4","home_on_5","home_on_6","home_goalie",
|
44
46
|
"away_on_1_id","away_on_2_id","away_on_3_id","away_on_4_id","away_on_5_id","away_on_6_id","away_goalie_id",
|
@@ -46,6 +48,34 @@ def get_col():
|
|
46
48
|
"event_coach","away_coach","home_coach"
|
47
49
|
]
|
48
50
|
|
51
|
+
def med_x_coord(group):
|
52
|
+
#Calculate median x coordinate of a corsi shot for a team in a period to determine the direction they are shooting in that period (for coordinate adjustments and geometric calculations)
|
53
|
+
med_x = group.loc[group['event_type'].isin(['blocked-shot','missed-shot','shot-on-goal','goal']),'x'].median(skipna=True)
|
54
|
+
group['med_x'] = med_x
|
55
|
+
|
56
|
+
return group
|
57
|
+
|
58
|
+
def adjust_coords(pbp):
|
59
|
+
#Given JSON or ESPN pbp data, return pbp with adjusted coordinates
|
60
|
+
|
61
|
+
#Recalibrate coordinates
|
62
|
+
#Determine the direction teams are shooting in a given period
|
63
|
+
pbp = pbp.groupby(['event_team_venue','period','game_id'],group_keys=False).apply(med_x_coord)
|
64
|
+
|
65
|
+
pbp = pbp.reset_index(drop=True)
|
66
|
+
|
67
|
+
#Adjust coordinates
|
68
|
+
pbp['x_adj'] = np.where((((pbp['event_team_venue']=='home')&(pbp['med_x'] < 0))|((pbp['event_team_venue']=='away')&(pbp['med_x'] > 0))),-pbp['x'],pbp['x'])
|
69
|
+
|
70
|
+
#Adjust y if necessary
|
71
|
+
pbp['y_adj'] = np.where((pbp['x']==pbp['x_adj']),pbp['y'],-pbp['y'])
|
72
|
+
|
73
|
+
#Calculate event distance and angle relative to venue location
|
74
|
+
pbp['event_distance'] = np.where(pbp['event_team_venue']=='home',np.sqrt(((89 - pbp['x_adj'])**2) + (pbp['y_adj']**2)),np.sqrt((((-89) - pbp['x_adj'])**2) + (pbp['y_adj']**2)))
|
75
|
+
pbp['event_angle'] = np.where(pbp['event_team_venue']=='away',np.degrees(np.arctan2(abs(pbp['y_adj']), abs(89 - pbp['x_adj']))),np.degrees(np.arctan2(abs(pbp['y_adj']), abs((-89) - pbp['x_adj']))))
|
76
|
+
|
77
|
+
#Return: pbp with adjiusted coordinates
|
78
|
+
return pbp
|
49
79
|
|
50
80
|
## JSON FUNCTIONS ##
|
51
81
|
def get_game_roster(json):
|
@@ -183,31 +213,6 @@ def parse_json(info):
|
|
183
213
|
|
184
214
|
events['event_team_venue'] = np.where(events['details.eventOwnerTeamId']==info['home_team_id'],"home","away")
|
185
215
|
|
186
|
-
#Coordinate adjustments:
|
187
|
-
#The WSBA NHL Scraper includes three sets of coordinates per event:
|
188
|
-
# x, y - Raw coordinates from JSON pbpp
|
189
|
-
# x_fixed, y_fixed - Coordinates fixed to the right side of the ice (x is always greater than 0)
|
190
|
-
# x_adj, y_adj - Adjusted coordinates configuring away events with negative x vlaues while home events are always positive
|
191
|
-
|
192
|
-
#Some games (mostly preseason and all star games) do not include coordinates.
|
193
|
-
try:
|
194
|
-
events['x_fixed'] = abs(events['details.xCoord'])
|
195
|
-
events['y_fixed'] = np.where(events['details.xCoord']<0,-events['details.yCoord'],events['details.yCoord'])
|
196
|
-
events['x_adj'] = np.where(events['event_team_venue']=="home",events['x_fixed'],-events['x_fixed'])
|
197
|
-
events['y_adj'] = np.where(events['event_team_venue']=="home",events['y_fixed'],-events['y_fixed'])
|
198
|
-
events['event_distance'] = np.sqrt(((89 - events['x_fixed'])**2) + (events['y_fixed']**2))
|
199
|
-
events['event_angle'] = np.degrees(np.arctan2(abs(events['y_fixed']), abs(89 - events['x_fixed'])))
|
200
|
-
except TypeError:
|
201
|
-
print(f"No coordinates found for game {info['id'][0]}...")
|
202
|
-
|
203
|
-
events['x_fixed'] = np.nan
|
204
|
-
events['y_fixed'] = np.nan
|
205
|
-
events['x_adj'] = np.nan
|
206
|
-
events['y_adj'] = np.nan
|
207
|
-
events['event_distance'] = np.nan
|
208
|
-
events['event_angle'] = np.nan
|
209
|
-
|
210
|
-
|
211
216
|
events['event_team_abbr'] = events['details.eventOwnerTeamId'].replace({
|
212
217
|
info['away_team_id']:[info['away_team_abbr']],
|
213
218
|
info['home_team_id']:[info['home_team_abbr']]
|
@@ -226,223 +231,43 @@ def parse_json(info):
|
|
226
231
|
"typeDescKey":"event_type",
|
227
232
|
"details.shotType":"shot_type",
|
228
233
|
"details.duration":"penalty_duration",
|
229
|
-
"details.descKey":"
|
230
|
-
"details.
|
234
|
+
"details.descKey":"penalty_type",
|
235
|
+
"details.typeCode":'penalty_attribution',
|
236
|
+
"details.reason":"event_reason",
|
231
237
|
"details.zoneCode":"zone_code",
|
232
238
|
"details.xCoord":"x",
|
233
239
|
"details.yCoord":"y",
|
234
240
|
"details.goalieInNetId": "event_goalie_id",
|
235
|
-
"details.awaySOG":"
|
236
|
-
"details.homeSOG":"
|
241
|
+
"details.awaySOG":"away_sog",
|
242
|
+
"details.homeSOG":"home_sog"
|
237
243
|
})
|
238
244
|
|
245
|
+
#Coordinate adjustments:
|
246
|
+
# x, y - Raw coordinates from JSON pbp
|
247
|
+
# x_adj, y_adj - Adjusted coordinates configuring the away offensive zone to the left and the home offensive zone to the right
|
248
|
+
#Some games (mostly preseason and all star games) do not include coordinates.
|
249
|
+
|
250
|
+
try:
|
251
|
+
events = adjust_coords(events)
|
252
|
+
|
253
|
+
except KeyError:
|
254
|
+
print(f"No coordinates found for game {info['game_id'][0]}...")
|
255
|
+
|
256
|
+
events['x_adj'] = np.nan
|
257
|
+
events['y_adj'] = np.nan
|
258
|
+
events['event_distance'] = np.nan
|
259
|
+
events['event_angle'] = np.nan
|
260
|
+
|
239
261
|
#Period time adjustments (only 'seconds_elapsed' is included in the resulting data)
|
240
262
|
events['period_seconds_elapsed'] = events['period_time_elasped'].apply(convert_to_seconds)
|
241
263
|
events['seconds_elapsed'] = ((events['period']-1)*1200)+events['period_seconds_elapsed']
|
242
264
|
|
243
265
|
events = events.loc[(events['event_type']!="")]
|
244
|
-
|
245
|
-
#Assign score and fenwick for each event
|
246
|
-
fenwick_events = ['missed-shot','shot-on-goal','goal']
|
247
|
-
ag = 0
|
248
|
-
ags = []
|
249
|
-
hg = 0
|
250
|
-
hgs = []
|
251
|
-
|
252
|
-
af = 0
|
253
|
-
afs = []
|
254
|
-
hf = 0
|
255
|
-
hfs = []
|
256
|
-
for event,team in zip(list(events['event_type']),list(events['event_team_venue'])):
|
257
|
-
if event in fenwick_events:
|
258
|
-
if team == "home":
|
259
|
-
hf += 1
|
260
|
-
if event == 'goal':
|
261
|
-
hg += 1
|
262
|
-
else:
|
263
|
-
af += 1
|
264
|
-
if event == 'goal':
|
265
|
-
ag += 1
|
266
|
-
|
267
|
-
ags.append(ag)
|
268
|
-
hgs.append(hg)
|
269
|
-
afs.append(af)
|
270
|
-
hfs.append(hf)
|
271
|
-
|
272
|
-
events['away_score'] = ags
|
273
|
-
events['home_score'] = hgs
|
274
|
-
events['away_fenwick'] = afs
|
275
|
-
events['home_fenwick'] = hfs
|
276
266
|
|
277
267
|
#Return: dataframe with parsed game
|
278
268
|
return events
|
279
269
|
|
280
|
-
### ESPN SCRAPING FUNCTIONS ###
|
281
|
-
def espn_game_id(date,away,home):
|
282
|
-
#Given a date formatted as YYYY-MM-DD and teams, return game id from ESPN schedule
|
283
|
-
date = date.replace("-","")
|
284
|
-
|
285
|
-
#Retreive data
|
286
|
-
api = f"https://site.api.espn.com/apis/site/v2/sports/hockey/nhl/scoreboard?dates={date}"
|
287
|
-
schedule = pd.json_normalize(rs.get(api).json()['events'])
|
288
|
-
|
289
|
-
#Create team abbreviation columns
|
290
|
-
schedule['away_team_abbr'] = schedule['shortName'].str[:3].str.strip(" ")
|
291
|
-
schedule['home_team_abbr'] = schedule['shortName'].str[-3:].str.strip(" ")
|
292
|
-
|
293
|
-
#Modify team abbreviations as necessary
|
294
|
-
schedule = schedule.replace({
|
295
|
-
"LA":"LAK",
|
296
|
-
"NJ":"NJD",
|
297
|
-
"SJ":"SJS",
|
298
|
-
"TB":"TBL",
|
299
|
-
})
|
300
|
-
|
301
|
-
#Retreive game id
|
302
|
-
game_id = schedule.loc[(schedule['away_team_abbr']==away)&
|
303
|
-
(schedule['home_team_abbr']==home),'id'].tolist()[0]
|
304
|
-
|
305
|
-
#Return: ESPN game id
|
306
|
-
return game_id
|
307
270
|
|
308
|
-
def parse_espn(date,away,home):
|
309
|
-
#Given a date formatted as YYYY-MM-DD and teams, return game events
|
310
|
-
game_id = espn_game_id(date,away,home)
|
311
|
-
url = f'https://www.espn.com/nhl/playbyplay/_/gameId/{game_id}'
|
312
|
-
|
313
|
-
#Code modified from Patrick Bacon
|
314
|
-
|
315
|
-
#Retreive game events as json
|
316
|
-
page = rs.get(url, headers={'User-Agent': 'Mozilla/5.0'}, timeout = 500)
|
317
|
-
soup = BeautifulSoup(page.content.decode('ISO-8859-1'), 'lxml', multi_valued_attributes = None)
|
318
|
-
json = json_lib.loads(str(soup).split('"playGrps":')[1].split(',"tms"')[0])
|
319
|
-
|
320
|
-
#DataFrame of time-related info for events
|
321
|
-
clock_df = pd.DataFrame()
|
322
|
-
|
323
|
-
for period in range(0, len(json)):
|
324
|
-
clock_df = clock_df._append(pd.DataFrame(json[period]))
|
325
|
-
|
326
|
-
clock_df = clock_df[~pd.isna(clock_df.clock)]
|
327
|
-
|
328
|
-
# Needed to add .split(',"st":3')[0] for playoffs
|
329
|
-
|
330
|
-
#DataFrame of coordinates for events
|
331
|
-
coords_df = pd.DataFrame(json_lib.loads(str(soup).split('plays":')[1].split(',"st":1')[0].split(',"st":2')[0].split(',"st":3')[0]))
|
332
|
-
|
333
|
-
clock_df = clock_df.assign(
|
334
|
-
clock = clock_df.clock.apply(lambda x: x['displayValue'])
|
335
|
-
)
|
336
|
-
|
337
|
-
coords_df = coords_df.assign(
|
338
|
-
coords_x = coords_df[~pd.isna(coords_df.coordinate)].coordinate.apply(lambda x: x['x']).astype(int),
|
339
|
-
coords_y = coords_df[~pd.isna(coords_df.coordinate)].coordinate.apply(lambda y: y['y']).astype(int),
|
340
|
-
)
|
341
|
-
|
342
|
-
#Combine
|
343
|
-
espn_events = coords_df.merge(clock_df.loc[:, ['id', 'clock']])
|
344
|
-
|
345
|
-
espn_events = espn_events.assign(
|
346
|
-
period = espn_events['period'].apply(lambda x: x['number']),
|
347
|
-
minutes = espn_events['clock'].str.split(':').apply(lambda x: x[0]).astype(int),
|
348
|
-
seconds = espn_events['clock'].str.split(':').apply(lambda x: x[1]).astype(int),
|
349
|
-
event_type = espn_events['type'].apply(lambda x: x['txt'])
|
350
|
-
)
|
351
|
-
|
352
|
-
espn_events = espn_events.assign(coords_x = np.where((pd.isna(espn_events.coords_x)) & (pd.isna(espn_events.coords_y)) &
|
353
|
-
(espn_events.event_type=='Face Off'), 0, espn_events.coords_x
|
354
|
-
),
|
355
|
-
coords_y = np.where((pd.isna(espn_events.coords_x)) & (pd.isna(espn_events.coords_y)) &
|
356
|
-
(espn_events.event_type=='Face Off'), 0, espn_events.coords_y))
|
357
|
-
|
358
|
-
espn_events = espn_events[(~pd.isna(espn_events.coords_x)) & (~pd.isna(espn_events.coords_y))]
|
359
|
-
|
360
|
-
espn_events = espn_events.assign(
|
361
|
-
coords_x = espn_events.coords_x.astype(int),
|
362
|
-
coords_y = espn_events.coords_y.astype(int)
|
363
|
-
)
|
364
|
-
|
365
|
-
#Rename events
|
366
|
-
#The turnover event includes just one player in the event information, meaning takeaways will have no coordinates for play-by-plays created by ESPN scraping
|
367
|
-
espn_events['event_type'] = espn_events['event_type'].replace({
|
368
|
-
"Face Off":'faceoff',
|
369
|
-
"Hit":'hit',
|
370
|
-
"Shot":'shot-on-goal',
|
371
|
-
"Missed":'missed-shot',
|
372
|
-
"Blocked":'blocked-shot',
|
373
|
-
"Goal":'goal',
|
374
|
-
"Turnover":'giveaway',
|
375
|
-
"Delayed Penalty":'delayed-penalty',
|
376
|
-
"Penalty":'penalty',
|
377
|
-
})
|
378
|
-
|
379
|
-
#Period time adjustments (only 'seconds_elapsed' is included in the resulting data)
|
380
|
-
espn_events['period_time_simple'] = espn_events['clock'].str.replace(":","",regex=True)
|
381
|
-
espn_events['period_seconds_elapsed'] = np.where(espn_events['period_time_simple'].str.len()==3,
|
382
|
-
((espn_events['period_time_simple'].str[0].astype(int)*60)+espn_events['period_time_simple'].str[-2:].astype(int)),
|
383
|
-
((espn_events['period_time_simple'].str[0:2].astype(int)*60)+espn_events['period_time_simple'].str[-2:].astype(int)))
|
384
|
-
espn_events['seconds_elapsed'] = ((espn_events['period']-1)*1200)+espn_events['period_seconds_elapsed']
|
385
|
-
|
386
|
-
espn_events = espn_events.rename(columns = {'text':'description'})
|
387
|
-
|
388
|
-
#Add event team
|
389
|
-
espn_events['event_team_abbr'] = espn_events['homeAway'].replace({
|
390
|
-
"away":away,
|
391
|
-
"home":home
|
392
|
-
})
|
393
|
-
|
394
|
-
#Some games (mostly preseason and all star games) do not include coordinates.
|
395
|
-
try:
|
396
|
-
espn_events['x_fixed'] = abs(espn_events['coords_x'])
|
397
|
-
espn_events['y_fixed'] = np.where(espn_events['coords_x']<0,-espn_events['coords_y'],espn_events['coords_y'])
|
398
|
-
espn_events['x_adj'] = np.where(espn_events['homeAway']=="home",espn_events['x_fixed'],-espn_events['x_fixed'])
|
399
|
-
espn_events['y_adj'] = np.where(espn_events['homeAway']=="home",espn_events['y_fixed'],-espn_events['y_fixed'])
|
400
|
-
espn_events['event_distance'] = np.sqrt(((89 - espn_events['x_fixed'])**2) + (espn_events['y_fixed']**2))
|
401
|
-
espn_events['event_angle'] = np.degrees(np.arctan2(abs(espn_events['y_fixed']), abs(89 - espn_events['x_fixed'])))
|
402
|
-
except TypeError:
|
403
|
-
print(f"No coordinates found for ESPN game...")
|
404
|
-
|
405
|
-
espn_events['x_fixed'] = np.nan
|
406
|
-
espn_events['y_fixed'] = np.nan
|
407
|
-
espn_events['x_adj'] = np.nan
|
408
|
-
espn_events['y_adj'] = np.nan
|
409
|
-
espn_events['event_distance'] = np.nan
|
410
|
-
espn_events['event_angle'] = np.nan
|
411
|
-
|
412
|
-
#Assign score and fenwick for each event
|
413
|
-
fenwick_events = ['missed-shot','shot-on-goal','goal']
|
414
|
-
ag = 0
|
415
|
-
ags = []
|
416
|
-
hg = 0
|
417
|
-
hgs = []
|
418
|
-
|
419
|
-
af = 0
|
420
|
-
afs = []
|
421
|
-
hf = 0
|
422
|
-
hfs = []
|
423
|
-
for event,team in zip(list(espn_events['event_type']),list(espn_events['homeAway'])):
|
424
|
-
if event in fenwick_events:
|
425
|
-
if team == "home":
|
426
|
-
hf += 1
|
427
|
-
if event == 'goal':
|
428
|
-
hg += 1
|
429
|
-
else:
|
430
|
-
af += 1
|
431
|
-
if event == 'goal':
|
432
|
-
ag += 1
|
433
|
-
|
434
|
-
ags.append(ag)
|
435
|
-
hgs.append(hg)
|
436
|
-
afs.append(af)
|
437
|
-
hfs.append(hf)
|
438
|
-
|
439
|
-
espn_events['away_score'] = ags
|
440
|
-
espn_events['home_score'] = hgs
|
441
|
-
espn_events['away_fenwick'] = afs
|
442
|
-
espn_events['home_fenwick'] = hfs
|
443
|
-
#Return: play-by-play events in supplied game from ESPN
|
444
|
-
return espn_events
|
445
|
-
|
446
271
|
## HTML PBP FUNCTIONS ##
|
447
272
|
def strip_html_pbp(td,rosters):
|
448
273
|
#Given html row, parse data from HTML pbp
|
@@ -529,7 +354,7 @@ def parse_html(info):
|
|
529
354
|
event_log = []
|
530
355
|
for event in events:
|
531
356
|
events_dict = {}
|
532
|
-
if event[0] == "#" or event[4] in ['GOFF', 'EGT', 'PGSTR', 'PGEND', 'ANTHEM','SPC','PBOX','
|
357
|
+
if event[0] == "#" or event[4] in ['GOFF', 'EGT', 'PGSTR', 'PGEND', 'ANTHEM', 'SPC', 'PBOX', 'EISTR', 'EIEND','EGPID'] or event[3]=='-16:0-':
|
533
358
|
continue
|
534
359
|
else:
|
535
360
|
#Event info
|
@@ -660,10 +485,15 @@ def parse_html(info):
|
|
660
485
|
status = teams[team]
|
661
486
|
data = rosters[status[0]]
|
662
487
|
|
663
|
-
|
664
|
-
|
665
|
-
|
488
|
+
#In rare instances the event player is not on the event team (i.e. "WSH TAKEAWAY - #71 CIRELLI, Off. Zone" when #71 CIRELLI is on TBL)
|
489
|
+
try:
|
490
|
+
events_dict[f'event_player_{i+1}_name'] = data[str(num)][2]
|
491
|
+
events_dict[f'event_player_{i+1}_id'] = data[str(num)][4]
|
492
|
+
events_dict[f'event_player_{i+1}_pos'] = data[str(num)][1]
|
493
|
+
except:
|
494
|
+
''
|
666
495
|
|
496
|
+
#Event skaters and strength-state information
|
667
497
|
events_dict['away_skaters'] = away_skaters
|
668
498
|
events_dict['home_skaters'] = home_skaters
|
669
499
|
events_dict['away_goalie_in'] = away_goalie
|
@@ -683,7 +513,7 @@ def parse_html(info):
|
|
683
513
|
'GSTR':"game-start",
|
684
514
|
"ANTHEM":"anthem",
|
685
515
|
"PSTR":"period-start",
|
686
|
-
|
516
|
+
"FAC":"faceoff",
|
687
517
|
"SHOT":"shot-on-goal",
|
688
518
|
"BLOCK":"blocked-shot",
|
689
519
|
"STOP":"stoppage",
|
@@ -695,14 +525,28 @@ def parse_html(info):
|
|
695
525
|
"DELPEN":"delayed-penalty",
|
696
526
|
"PENL":"penalty",
|
697
527
|
"CHL":"challenge",
|
528
|
+
"SOC":'shootout-complete',
|
698
529
|
"PEND":"period-end",
|
699
530
|
"GEND":"game-end"
|
700
531
|
})
|
701
|
-
|
532
|
+
|
702
533
|
#Return: parsed HTML pbp
|
703
534
|
return data
|
704
535
|
|
705
|
-
def
|
536
|
+
def assign_target(data):
|
537
|
+
#Assign target number to plays to assist with merging
|
538
|
+
|
539
|
+
#New sort
|
540
|
+
data = data.sort_values(['period','seconds_elapsed','event_type','event_team_abbr','event_player_1_id','event_player_2_id'])
|
541
|
+
|
542
|
+
#Target number distingushes events that occur in the same second to assist in merging the JSON and HTML
|
543
|
+
#Sometimes the target number may not reflect the same order as the event number in either document (especially in earlier seasons where the events are out of order in the HTML or JSON)
|
544
|
+
data['target_num'] = np.where(data['event_type'].isin(['penalty','blocked-shot','missed-shot','shot-on-goal','goal']),data['event_type'].isin(['penalty','blocked-shot','missed-shot','shot-on-goal','goal']).cumsum(),0)
|
545
|
+
|
546
|
+
#Revert sort and return dataframe
|
547
|
+
return data.reset_index()
|
548
|
+
|
549
|
+
def combine_pbp(info,sources):
|
706
550
|
#Given game info, return complete play-by-play data for provided game
|
707
551
|
|
708
552
|
html_pbp = parse_html(info)
|
@@ -710,23 +554,49 @@ def combine_pbp(info):
|
|
710
554
|
#Route data combining - json if season is after 2009-2010:
|
711
555
|
if str(info['season']) in ['20052006','20062007','20072008','20082009','20092010']:
|
712
556
|
#ESPN x HTML
|
713
|
-
espn_pbp = parse_espn(str(info['game_date']),info['away_team_abbr'],info['home_team_abbr']).rename(columns={'coords_x':'x',"coords_y":'y'})
|
714
|
-
merge_col = ['period','seconds_elapsed','event_type','event_team_abbr']
|
557
|
+
#espn_pbp = parse_espn(str(info['game_date']),info['away_team_abbr'],info['home_team_abbr']).rename(columns={'coords_x':'x',"coords_y":'y'}).sort_values(['period','seconds_elapsed']).reset_index()
|
558
|
+
#merge_col = ['period','seconds_elapsed','event_type','event_team_abbr']
|
715
559
|
|
716
|
-
|
560
|
+
#Merge pbp
|
561
|
+
#df = pd.merge(html_pbp,espn_pbp,how='left',on=merge_col)
|
562
|
+
print('In-repair, please try again later...')
|
717
563
|
|
718
564
|
else:
|
719
565
|
#JSON x HTML
|
720
566
|
json_pbp = parse_json(info)
|
721
|
-
#Modify merge conditions and merge pbps
|
722
|
-
merge_col = ['period','seconds_elapsed','event_type','event_team_abbr','event_player_1_id']
|
723
|
-
html_pbp = html_pbp.drop(columns=['event_player_2_id','event_player_3_id','shot_type','zone_code'],errors='ignore')
|
724
567
|
|
725
|
-
|
726
|
-
|
727
|
-
|
568
|
+
if sources:
|
569
|
+
dirs_html = f'sources/{info['season']}/HTML/'
|
570
|
+
dirs_json = f'sources/{info['season']}/JSON/'
|
571
|
+
|
572
|
+
if not os.path.exists(dirs_html):
|
573
|
+
os.makedirs(dirs_html)
|
574
|
+
if not os.path.exists(dirs_json):
|
575
|
+
os.makedirs(dirs_json)
|
576
|
+
|
577
|
+
html_pbp.to_csv(f'{dirs_html}{info['game_id']}_HTML.csv',index=False)
|
578
|
+
json_pbp.to_csv(f'{dirs_json}{info['game_id']}_JSON.csv',index=False)
|
579
|
+
|
580
|
+
#Assign target numbers
|
581
|
+
html_pbp = assign_target(html_pbp)
|
582
|
+
json_pbp = assign_target(json_pbp)
|
583
|
+
|
584
|
+
#Merge on index if the df lengths are the same and the events are in the same general order; merge on columns otherwise
|
585
|
+
if (len(html_pbp) == len(json_pbp)) and (html_pbp['event_type'].equals(json_pbp['event_type'])) and (html_pbp['seconds_elapsed'].equals(json_pbp['seconds_elapsed'])):
|
586
|
+
html_pbp = html_pbp.drop(columns=['period','seconds_elapsed','event_type','event_team_abbr','event_player_1_id','event_player_2_id','event_player_3_id','shot_type','zone_code'],errors='ignore').reset_index()
|
587
|
+
df = pd.merge(html_pbp,json_pbp,how='left',left_index=True,right_index=True).sort_values(['event_num'])
|
588
|
+
else:
|
589
|
+
print(f' merging on columns...',end="")
|
590
|
+
#Modify merge conditions and merge pbps
|
591
|
+
merge_col = ['period','seconds_elapsed','event_type','event_team_abbr','event_player_1_id','target_num']
|
592
|
+
html_pbp = html_pbp.drop(columns=['event_player_2_id','event_player_3_id','shot_type','zone_code'],errors='ignore')
|
593
|
+
|
594
|
+
#While rare sometimes column 'event_player_1_id' is interpreted differently between the two dataframes.
|
595
|
+
html_pbp['event_player_1_id'] = html_pbp['event_player_1_id'].astype(object)
|
596
|
+
json_pbp['event_player_1_id'] = json_pbp['event_player_1_id'].astype(object)
|
728
597
|
|
729
|
-
|
598
|
+
#Merge pbp
|
599
|
+
df = pd.merge(html_pbp,json_pbp,how='left',on=merge_col).sort_values(['event_num'])
|
730
600
|
|
731
601
|
#Add game info
|
732
602
|
info_col = ['season','season_type','game_id','game_date',"venue","venue_location",
|
@@ -745,7 +615,7 @@ def combine_pbp(info):
|
|
745
615
|
df['event_team_venue'] = np.where(df['event_team_abbr'].isna(),"",np.where(df['home_team_abbr']==df['event_team_abbr'],"home","away"))
|
746
616
|
|
747
617
|
#Correct strength state for penalty shots and shootouts - most games dont have shifts in shootout and are disculuded otherwise
|
748
|
-
df['strength_state'] = np.where(
|
618
|
+
df['strength_state'] = np.where((df['period'].astype(str)=='5')&(df['event_type'].isin(['missed-shot','shot-on-goal','goal']))&(df['season_type']==2),"1v0",df['strength_state'])
|
749
619
|
df['strength_state'] = np.where(df['description'].str.contains('Penalty Shot',case=False),"1v0",df['strength_state'])
|
750
620
|
|
751
621
|
col = [col for col in get_col() if col in df.columns.to_list()]
|
@@ -977,7 +847,7 @@ def parse_shift_events(info,home):
|
|
977
847
|
return pd.merge(shifts,on_players,how="outer",on=['row']).replace(np.nan,"")
|
978
848
|
|
979
849
|
## FINALIZE PBP FUNCTIONS ##
|
980
|
-
def combine_shifts(info):
|
850
|
+
def combine_shifts(info,sources):
|
981
851
|
#Given game info, return complete shift events
|
982
852
|
|
983
853
|
#JSON Prep
|
@@ -1027,21 +897,35 @@ def combine_shifts(info):
|
|
1027
897
|
data['home_skaters'] = data[home_on].replace(r'^\s*$', np.nan, regex=True).notna().sum(axis=1)
|
1028
898
|
data['strength_state'] = np.where(data['event_team_abbr']==data['away_team_abbr'],data['away_skaters'].astype(str)+"v"+data['home_skaters'].astype(str),data['home_skaters'].astype(str)+"v"+data['away_skaters'].astype(str))
|
1029
899
|
|
1030
|
-
#
|
900
|
+
#Create final shifts df
|
1031
901
|
col = [col for col in get_col() if col in data.columns.to_list()]
|
1032
|
-
|
902
|
+
full_shifts = data[col]
|
903
|
+
|
904
|
+
#Export sources if true
|
905
|
+
if sources:
|
906
|
+
dirs = f'sources/{info['season']}/SHIFTS/'
|
907
|
+
|
908
|
+
if not os.path.exists(dirs):
|
909
|
+
os.makedirs(dirs)
|
910
|
+
|
911
|
+
full_shifts.to_csv(f'{dirs}{info['game_id']}_SHIFTS.csv',index=False)
|
912
|
+
|
913
|
+
#Return: full shifts data converted to play-by-play format
|
914
|
+
return full_shifts
|
1033
915
|
|
1034
|
-
def combine_data(info):
|
916
|
+
def combine_data(info,sources):
|
1035
917
|
#Given game info, return complete play-by-play data
|
1036
918
|
|
1037
919
|
game_id = info['game_id']
|
1038
920
|
|
1039
|
-
pbp = combine_pbp(info)
|
1040
|
-
shifts = combine_shifts(info)
|
921
|
+
pbp = combine_pbp(info,sources)
|
922
|
+
shifts = combine_shifts(info,sources)
|
1041
923
|
|
1042
924
|
#Combine data
|
1043
925
|
df = pd.concat([pbp,shifts])
|
1044
926
|
|
927
|
+
df['event_num'] = df['event_num'].replace(np.nan,0)
|
928
|
+
|
1045
929
|
#Create priority columns designed to order events that occur at the same time in a game
|
1046
930
|
even_pri = ['takeaway','giveaway','missed-shot','hit','shot-on-goal','blocked-shot']
|
1047
931
|
df['priority'] = np.where(df['event_type'].isin(even_pri),1,
|
@@ -1054,9 +938,9 @@ def combine_data(info):
|
|
1054
938
|
np.where(df['event_type']=='game-end',8,
|
1055
939
|
np.where(df['event_type']=='period-start',9,
|
1056
940
|
np.where(df['event_type']=='faceoff',10,0))))))))))
|
1057
|
-
|
941
|
+
|
1058
942
|
df[['period','seconds_elapsed']] = df[['period','seconds_elapsed']].astype(int)
|
1059
|
-
df = df.sort_values(['period','seconds_elapsed','priority'])
|
943
|
+
df = df.sort_values(['period','seconds_elapsed','event_num','priority'])
|
1060
944
|
|
1061
945
|
#Recalibrate event_num column to accurately depict the order of all events, including changes
|
1062
946
|
df.reset_index(inplace=True,drop=True)
|
@@ -1093,13 +977,20 @@ def combine_data(info):
|
|
1093
977
|
df['away_coach'] = coaches['away']
|
1094
978
|
df['home_coach'] = coaches['home']
|
1095
979
|
df['event_coach'] = np.where(df['event_team_abbr']==df['home_team_abbr'],coaches['home'],np.where(df['event_team_abbr']==df['away_team_abbr'],coaches['away'],""))
|
1096
|
-
|
980
|
+
|
981
|
+
#Assign score, corsi, fenwick, and penalties for each event
|
982
|
+
for venue in ['away','home']:
|
983
|
+
df[f'{venue}_score'] = ((df['event_team_venue']==venue)&(df['event_type']=='goal')).cumsum()
|
984
|
+
df[f'{venue}_corsi'] = ((df['event_team_venue']==venue)&(df['event_type'].isin(['blocked-shot','missed-shot','shot-on-goal','goal']))).cumsum()
|
985
|
+
df[f'{venue}_fenwick'] = ((df['event_team_venue']==venue)&(df['event_type'].isin(['missed-shot','shot-on-goal','goal']))).cumsum()
|
986
|
+
df[f'{venue}_penalties'] = ((df['event_team_venue']==venue)&(df['event_type']=='penalty')).cumsum()
|
987
|
+
|
1097
988
|
#Forward fill as necessary
|
1098
|
-
cols = ['period_type','home_team_defending_side','
|
989
|
+
cols = ['period_type','home_team_defending_side','away_coach','home_coach']
|
1099
990
|
for col in cols:
|
1100
991
|
try: df[col]
|
1101
992
|
except: df[col] = ""
|
1102
993
|
df[col] = df[col].ffill()
|
1103
|
-
|
994
|
+
|
1104
995
|
#Return: complete play-by-play with all important data for each event in a provided game
|
1105
996
|
return df[[col for col in get_col() if col in df.columns.to_list()]].replace(r'^\s*$', np.nan, regex=True)
|