wsba-hockey 1.0.2__py3-none-any.whl → 1.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- wsba_hockey/data_pipelines.py +183 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/duckdb/vendor.py +146 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/flatted/python/flatted.py +149 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/flatted/python/test.py +63 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/gyp_main.py +45 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSNew.py +367 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSProject.py +206 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSSettings.py +1270 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSSettings_test.py +1547 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSToolFile.py +59 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSUserFile.py +153 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSUtil.py +271 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSVersion.py +574 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/__init__.py +690 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/common.py +661 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/common_test.py +78 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/easy_xml.py +165 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/easy_xml_test.py +109 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/flock_tool.py +55 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/__init__.py +0 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/analyzer.py +808 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/android.py +1173 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/cmake.py +1321 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/compile_commands_json.py +120 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/dump_dependency_json.py +103 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/eclipse.py +464 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/gypd.py +89 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/gypsh.py +58 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/make.py +2714 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/msvs.py +3981 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/msvs_test.py +44 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/ninja.py +2936 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/ninja_test.py +55 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/xcode.py +1394 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/xcode_test.py +25 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/input.py +3130 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/input_test.py +98 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/mac_tool.py +771 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/msvs_emulation.py +1271 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/ninja_syntax.py +174 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/simple_copy.py +61 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/win_tool.py +374 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/xcode_emulation.py +1939 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/xcode_ninja.py +302 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/xcodeproj_file.py +3197 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/xml_fix.py +65 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/test_gyp.py +261 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/tools/graphviz.py +102 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/tools/pretty_gyp.py +156 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/tools/pretty_sln.py +181 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/tools/pretty_vcproj.py +339 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/test/fixtures/test-charmap.py +31 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/update-gyp.py +64 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/gyp_main.py +45 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSNew.py +367 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSProject.py +206 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSSettings.py +1270 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSSettings_test.py +1547 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSToolFile.py +59 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSUserFile.py +153 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSUtil.py +271 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSVersion.py +574 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/__init__.py +666 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/common.py +654 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/common_test.py +78 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/easy_xml.py +165 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/easy_xml_test.py +109 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/flock_tool.py +55 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/__init__.py +0 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/analyzer.py +808 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/android.py +1173 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/cmake.py +1321 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/compile_commands_json.py +120 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/dump_dependency_json.py +103 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/eclipse.py +464 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/gypd.py +89 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/gypsh.py +58 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/make.py +2518 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/msvs.py +3978 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/msvs_test.py +44 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/ninja.py +2936 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/ninja_test.py +55 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/xcode.py +1394 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/xcode_test.py +25 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/input.py +3137 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/input_test.py +98 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/mac_tool.py +771 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/msvs_emulation.py +1271 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/ninja_syntax.py +174 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/simple_copy.py +61 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/win_tool.py +374 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/xcode_emulation.py +1939 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/xcode_ninja.py +302 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/xcodeproj_file.py +3197 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/xml_fix.py +65 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/setup.py +42 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/test_gyp.py +260 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/tools/graphviz.py +102 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/tools/pretty_gyp.py +156 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/tools/pretty_sln.py +181 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/tools/pretty_vcproj.py +339 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/test/fixtures/test-charmap.py +31 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/update-gyp.py +46 -0
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/game_stats/app.py +401 -0
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/game_stats/name_fix.py +47 -0
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/heatmaps/app.py +108 -0
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/heatmaps/plot.py +93 -0
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/heatmaps/rink_plot.py +245 -0
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/matchups/app.py +145 -0
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/matchups/plot.py +77 -0
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/matchups/rink_plot.py +245 -0
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/pbp/app.py +389 -0
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/pbp/plot.py +70 -0
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/pbp/rink_plot.py +245 -0
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/skater/app.py +110 -0
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/skater/plot.py +58 -0
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/skater/rink_plot.py +245 -0
- wsba_hockey/tools/agg.py +242 -53
- wsba_hockey/tools/plotting.py +15 -20
- wsba_hockey/tools/scraping.py +149 -258
- wsba_hockey/tools/xg_model.py +370 -298
- wsba_hockey/workspace.py +22 -101
- wsba_hockey/wsba_main.py +494 -147
- {wsba_hockey-1.0.2.dist-info → wsba_hockey-1.0.4.dist-info}/METADATA +2 -2
- wsba_hockey-1.0.4.dist-info/RECORD +135 -0
- {wsba_hockey-1.0.2.dist-info → wsba_hockey-1.0.4.dist-info}/WHEEL +1 -1
- wsba_hockey/stats/calculate_viz/shot_impact.py +0 -2
- wsba_hockey-1.0.2.dist-info/RECORD +0 -19
- {wsba_hockey-1.0.2.dist-info → wsba_hockey-1.0.4.dist-info}/licenses/LICENSE +0 -0
- {wsba_hockey-1.0.2.dist-info → wsba_hockey-1.0.4.dist-info}/top_level.txt +0 -0
wsba_hockey/tools/xg_model.py
CHANGED
@@ -3,48 +3,149 @@ import numpy as np
|
|
3
3
|
import xgboost as xgb
|
4
4
|
import scipy.sparse as sp
|
5
5
|
import joblib
|
6
|
-
|
7
|
-
import
|
6
|
+
import wsba_main as wsba
|
7
|
+
import tools.scraping as scraping
|
8
|
+
from sklearn.calibration import calibration_curve
|
9
|
+
from sklearn.metrics import roc_curve, auc
|
10
|
+
import matplotlib.pyplot as plt
|
8
11
|
|
9
12
|
### XG_MODEL FUNCTIONS ###
|
10
13
|
# Provided in this file are functions vital to the goal prediction model in the WSBA Hockey Python package. #
|
11
14
|
|
12
15
|
## GLOBAL VARIABLES ##
|
13
|
-
#Newest season
|
14
|
-
new_full = '20242025'
|
15
|
-
new = '2024'
|
16
16
|
|
17
|
-
|
17
|
+
target = "is_goal"
|
18
|
+
continuous = ['event_distance',
|
19
|
+
'event_angle',
|
20
|
+
'seconds_elapsed',
|
21
|
+
'period',
|
22
|
+
'x_adj',
|
23
|
+
'y_adj',
|
24
|
+
'distance_from_last',
|
25
|
+
'angle_from_last',
|
26
|
+
'seconds_since_last',
|
27
|
+
'speed_from_last',
|
28
|
+
'speed_of_angle_from_last',
|
29
|
+
'score_state',
|
30
|
+
'strength_diff'
|
31
|
+
]
|
32
|
+
boolean = ['is_home',
|
33
|
+
'wrist',
|
34
|
+
'deflected',
|
35
|
+
'tip-in',
|
36
|
+
'slap',
|
37
|
+
'backhand',
|
38
|
+
'snap',
|
39
|
+
'wrap-around',
|
40
|
+
'poke',
|
41
|
+
'bat',
|
42
|
+
'cradle',
|
43
|
+
'between-legs',
|
44
|
+
'prior_shot-on-goal_same',
|
45
|
+
'prior_missed-shot_same',
|
46
|
+
'prior_blocked-shot_same',
|
47
|
+
'prior_giveaway_same',
|
48
|
+
'prior_takeaway_same',
|
49
|
+
'prior_hit_same',
|
50
|
+
'prior_shot-on-goal_opp',
|
51
|
+
'prior_missed-shot_opp',
|
52
|
+
'prior_blocked-shot_opp',
|
53
|
+
'prior_giveaway_opp',
|
54
|
+
'prior_takeaway_opp',
|
55
|
+
'prior_hit_opp',
|
56
|
+
'prior_faceoff',
|
57
|
+
'regular',
|
58
|
+
'empty_net',
|
59
|
+
'offwing',
|
60
|
+
'rush',
|
61
|
+
'rebound'
|
62
|
+
]
|
63
|
+
|
64
|
+
events = ['faceoff','hit','giveaway','takeaway','blocked-shot','missed-shot','shot-on-goal','goal']
|
65
|
+
shot_types = ['wrist','deflected','tip-in','slap','backhand','snap','wrap-around','poke','bat','cradle','between-legs']
|
66
|
+
fenwick_events = ['missed-shot','shot-on-goal','goal']
|
67
|
+
strengths = ['3v3',
|
68
|
+
'3v4',
|
69
|
+
'3v5',
|
70
|
+
'4v3',
|
71
|
+
'4v4',
|
72
|
+
'4v5',
|
73
|
+
'4v6',
|
74
|
+
'5v3',
|
75
|
+
'5v4',
|
76
|
+
'5v5',
|
77
|
+
'5v6',
|
78
|
+
'6v4',
|
79
|
+
'6v5']
|
80
|
+
|
81
|
+
def fix_players(pbp):
|
82
|
+
#Add/fix player info for shooters and goaltenders
|
83
|
+
print('Adding player info to pbp...')
|
84
|
+
|
85
|
+
#Load roster and all players
|
86
|
+
roster = pd.read_csv('rosters/nhl_rosters.csv').drop_duplicates(['id'])[['fullName','id','shootsCatches']]
|
87
|
+
|
88
|
+
#Some players are missing from the roster file (generally in newer seasons); add these manually
|
89
|
+
miss = list(pbp.loc[~(pbp['event_player_1_id'].isin(list(roster['id'])))&(pbp['event_player_1_id'].notna()),'event_player_1_id'].drop_duplicates())
|
90
|
+
if miss:
|
91
|
+
add = wsba.nhl_scrape_player_data(miss).rename(columns={'playerId':'id'})[['fullName','id','shootsCatches']]
|
92
|
+
roster = pd.concat([roster,add]).reset_index(drop=True)
|
93
|
+
|
94
|
+
#Conversion dict
|
95
|
+
roster['id'] = roster['id'].astype(str)
|
96
|
+
roster_dict = roster.set_index('id').to_dict()['shootsCatches']
|
97
|
+
names_dict = roster.set_index('id').to_dict()['fullName']
|
98
|
+
|
99
|
+
#Add player names
|
100
|
+
for i in range(3):
|
101
|
+
pbp[f'add_player_{i+1}_name'] = np.where(pbp[f'event_player_{i+1}_name'].isna(),pbp[f'event_player_{i+1}_id'].astype(str).replace(names_dict),np.nan)
|
102
|
+
pbp[f'event_player_{i+1}_name'] = pbp[f'event_player_{i+1}_name'].combine_first(pbp[f'add_player_{i+1}_name'])
|
103
|
+
|
104
|
+
pbp['event_goalie_name'] = pbp['event_goalie_id'].astype(str).replace(names_dict)
|
105
|
+
|
106
|
+
#Add hands
|
107
|
+
pbp['event_player_1_hand'] = pbp['event_player_1_id'].astype(str).str.replace('.0','').replace(roster_dict)
|
108
|
+
pbp['event_player_1_hand'] = pbp['event_player_1_hand'].replace('nan',np.nan)
|
109
|
+
|
110
|
+
return pbp
|
111
|
+
|
112
|
+
def prep_xG_data(data):
|
18
113
|
#Prep data for xG training and calculation
|
19
114
|
|
20
|
-
events = ['faceoff','hit','giveaway','takeaway','blocked-shot','missed-shot','shot-on-goal','goal']
|
21
|
-
shot_types = ['wrist','deflected','tip-in','slap','backhand','snap','wrap-around','poke','bat','cradle','between-legs']
|
22
|
-
fenwick_events = ['missed-shot','shot-on-goal','goal']
|
23
|
-
|
24
115
|
#Informal groupby
|
25
|
-
data =
|
116
|
+
data = data.sort_values(by=['season','game_id','period','seconds_elapsed','event_num'])
|
26
117
|
|
27
|
-
#
|
28
|
-
data[
|
29
|
-
|
118
|
+
#Recalibrate times series data with current data
|
119
|
+
data['seconds_since_last'] = data['seconds_elapsed'] - data['seconds_elapsed'].shift(1)
|
120
|
+
#Prevent leaking between games by setting value to zero when no time has occured in game
|
121
|
+
data["seconds_since_last"] = np.where(data['seconds_elapsed']==0,0,data['seconds_since_last'])
|
30
122
|
|
31
123
|
#Create last event columns
|
32
124
|
data["event_team_last"] = data['event_team_abbr'].shift(1)
|
33
125
|
data["event_type_last"] = data['event_type'].shift(1)
|
34
|
-
data["
|
35
|
-
data["
|
36
|
-
data["zone_code_last"] = data['zone_code'].shift(1)
|
126
|
+
data["x_adj_last"] = data['x_adj'].shift(1)
|
127
|
+
data["y_adj_last"] = data['y_adj'].shift(1)
|
128
|
+
data["zone_code_last"] = data['zone_code'].shift(1)
|
37
129
|
|
38
130
|
data.sort_values(['season','game_id','period','seconds_elapsed','event_num'],inplace=True)
|
131
|
+
|
132
|
+
#Contextual Data (for score state minimize the capture to four goals)
|
39
133
|
data['score_state'] = np.where(data['away_team_abbr']==data['event_team_abbr'],data['away_score']-data['home_score'],data['home_score']-data['away_score'])
|
134
|
+
data['score_state'] = np.where(data['score_state']>4,4,data['score_state'])
|
135
|
+
data['score_state'] = np.where(data['score_state']<-4,-4,data['score_state'])
|
136
|
+
|
40
137
|
data['strength_diff'] = np.where(data['away_team_abbr']==data['event_team_abbr'],data['away_skaters']-data['home_skaters'],data['home_skaters']-data['away_skaters'])
|
41
138
|
data['strength_state_venue'] = data['away_skaters'].astype(str)+'v'+data['home_skaters'].astype(str)
|
42
|
-
data['
|
43
|
-
data['
|
139
|
+
data['distance_from_last'] = np.sqrt((data['x_adj'] - data['x_adj_last'])**2 + (data['y_adj'] - data['y_adj_last'])**2)
|
140
|
+
data['angle_from_last'] = np.degrees(np.arctan2(abs(data['y_adj'] - data['y_adj_last']), abs(89 - (data['x_adj']-data['x_adj_last']))))
|
44
141
|
|
45
|
-
#
|
46
|
-
data['
|
47
|
-
data['
|
142
|
+
#Event speeds
|
143
|
+
data['speed_from_last'] = np.where(data['seconds_since_last']==0,0,data['distance_from_last']/data['seconds_since_last'])
|
144
|
+
data['speed_of_angle_from_last'] = np.where(data['seconds_since_last']==0,0,data['angle_from_last']/data['seconds_since_last'])
|
145
|
+
|
146
|
+
#Rush and rebounds are labelled
|
147
|
+
data['rush'] = np.where((data['event_type'].isin(fenwick_events))&(data['zone_code_last'].isin(['N','D']))&(data['x_adj']>25)&(data['seconds_since_last']<=5),1,0)
|
148
|
+
data['rebound'] = np.where((data['event_type'].isin(fenwick_events))&(data['event_type_last'].isin(fenwick_events))&(data['seconds_since_last']<=2),1,0)
|
48
149
|
|
49
150
|
#Create boolean variables
|
50
151
|
data["is_goal"]=(data['event_type']=='goal').astype(int)
|
@@ -59,317 +160,288 @@ def prep_xG_data(pbp):
|
|
59
160
|
|
60
161
|
data['prior_faceoff'] = (data['event_type_last']=='faceoff').astype(int)
|
61
162
|
|
163
|
+
#Misc variables
|
164
|
+
data['empty_net'] = np.where((data['event_type'].isin(fenwick_events))&(data['event_goalie_id'].isna()),1,0)
|
165
|
+
data['regular'] = (data['season_type']==2).astype(int)
|
166
|
+
data['offwing'] = np.where(((data['y_adj']<0)&(data['event_player_1_hand']=='L'))|((data['y_adj']>=0)&(data['event_player_1_hand']=='R')),1,0)
|
167
|
+
|
62
168
|
#Return: pbp data prepared to train and calculate the xG model
|
63
169
|
return data
|
64
170
|
|
65
|
-
def wsba_xG(pbp,
|
171
|
+
def wsba_xG(pbp, hypertune = False, train = False, model_path = "tools/xg_model/wsba_xg.joblib", train_runs = 20, cv_runs = 20):
|
66
172
|
#Train and calculate the WSBA Expected Goals model
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
'strength_diff',
|
81
|
-
'fenwick_state',
|
82
|
-
'rush_mod',
|
83
|
-
'rebound_mod']
|
84
|
-
boolean = ['is_home',
|
85
|
-
'wrist',
|
86
|
-
'deflected',
|
87
|
-
'tip-in',
|
88
|
-
'slap',
|
89
|
-
'backhand',
|
90
|
-
'snap',
|
91
|
-
'wrap-around',
|
92
|
-
'poke',
|
93
|
-
'bat',
|
94
|
-
'cradle',
|
95
|
-
'between-legs',
|
96
|
-
'prior_shot-on-goal_same',
|
97
|
-
'prior_missed-shot_same',
|
98
|
-
'prior_blocked-shot_same',
|
99
|
-
'prior_giveaway_same',
|
100
|
-
'prior_takeaway_same',
|
101
|
-
'prior_hit_same',
|
102
|
-
'prior_shot-on-goal_opp',
|
103
|
-
'prior_missed-shot_opp',
|
104
|
-
'prior_blocked-shot_opp',
|
105
|
-
'prior_giveaway_opp',
|
106
|
-
'prior_takeaway_opp',
|
107
|
-
'prior_hit_opp',
|
108
|
-
'prior_faceoff']
|
109
|
-
|
110
|
-
#Prep Data
|
111
|
-
pbp = prep_xG_data(pbp)
|
112
|
-
#Filter unwanted date:
|
113
|
-
#Shots must occur in specified events and strength states, occur before the shootout, and have valid coordinates
|
114
|
-
events = ['faceoff','hit','giveaway','takeaway','blocked-shot','missed-shot','shot-on-goal','goal']
|
115
|
-
fenwick_events = ['missed-shot','shot-on-goal','goal']
|
116
|
-
strengths = ['3v3',
|
117
|
-
'3v4',
|
118
|
-
'3v5',
|
119
|
-
'4v3',
|
120
|
-
'4v4',
|
121
|
-
'4v5',
|
122
|
-
'4v6',
|
123
|
-
'5v3',
|
124
|
-
'5v4',
|
125
|
-
'5v5',
|
126
|
-
'5v6',
|
127
|
-
'6v4',
|
128
|
-
'6v5']
|
129
|
-
|
130
|
-
data = pbp.loc[(pbp['event_type'].isin(events))&
|
173
|
+
|
174
|
+
#Add index for future merging
|
175
|
+
pbp['event_index'] = pbp.index
|
176
|
+
|
177
|
+
#Recalibrate coordinates
|
178
|
+
pbp = scraping.adjust_coords(pbp)
|
179
|
+
|
180
|
+
#Fix strengths
|
181
|
+
pbp['strength_state'] = np.where((pbp['season_type']==3)&(pbp['period']>4),(np.where(pbp['event_team_abbr']==pbp['away_team_abbr'],pbp['away_skaters'].astype(str)+"v"+pbp['home_skaters'].astype(str),pbp['home_skaters'].astype(str)+"v"+pbp['away_skaters'].astype(str))),pbp['strength_state'])
|
182
|
+
|
183
|
+
#Filter unwanted data:
|
184
|
+
#Shots must occur in specified events and strength states, occur in open play, and have valid coordinates
|
185
|
+
pbp_prep = pbp.loc[(pbp['event_type'].isin(events))&
|
131
186
|
(pbp['strength_state'].isin(strengths))&
|
132
|
-
(pbp['
|
133
|
-
(pbp['
|
134
|
-
|
135
|
-
|
187
|
+
(pbp['x'].notna())&
|
188
|
+
(pbp['y'].notna())]
|
189
|
+
|
190
|
+
#Prep Data
|
191
|
+
data = prep_xG_data(pbp_prep)
|
136
192
|
|
193
|
+
#Reduce to fenwick shots
|
194
|
+
data = data.loc[data['event_type'].isin(fenwick_events)]
|
195
|
+
|
137
196
|
#Convert to sparse
|
138
|
-
data_sparse = sp.csr_matrix(data[[target]+
|
197
|
+
data_sparse = sp.csr_matrix(data[[target]+continuous+boolean])
|
139
198
|
|
140
199
|
#Target and Predictors
|
141
200
|
is_goal_vect = data_sparse[:, 0].A
|
142
201
|
predictors = data_sparse[:, 1:]
|
143
202
|
|
144
203
|
#XGB DataModel
|
145
|
-
xgb_matrix = xgb.DMatrix(data=predictors,label=is_goal_vect)
|
146
|
-
|
147
|
-
if train
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
204
|
+
xgb_matrix = xgb.DMatrix(data=predictors,label=is_goal_vect,feature_names=(continuous+boolean))
|
205
|
+
|
206
|
+
if train:
|
207
|
+
if hypertune:
|
208
|
+
# Number of runs
|
209
|
+
run_num = train_runs
|
210
|
+
|
211
|
+
# DataFrames to store results
|
212
|
+
best_df = pd.DataFrame(columns=["max_depth", "eta", "gamma", "subsample", "colsample_bytree", "min_child_weight", "max_delta_step"])
|
213
|
+
best_ll = pd.DataFrame(columns=["ll", "ll_rounds", "auc", "auc_rounds", "seed"])
|
214
|
+
|
215
|
+
# Loop
|
216
|
+
for i in range(run_num):
|
217
|
+
print(f"### LOOP: {i+1} ###")
|
218
|
+
|
219
|
+
param = {
|
220
|
+
"objective": "binary:logistic",
|
221
|
+
"eval_metric": ["logloss", "auc"],
|
222
|
+
"max_depth": 6,
|
223
|
+
"eta": np.random.uniform(0.06, 0.11),
|
224
|
+
"gamma": np.random.uniform(0.06, 0.12),
|
225
|
+
"subsample": np.random.uniform(0.76, 0.84),
|
226
|
+
"colsample_bytree": np.random.uniform(0.76, 0.8),
|
227
|
+
"min_child_weight": np.random.randint(5, 23),
|
228
|
+
"max_delta_step": np.random.randint(4, 9)
|
229
|
+
}
|
230
|
+
|
231
|
+
# Cross-validation
|
232
|
+
seed = np.random.randint(0, 10000)
|
233
|
+
np.random.seed(seed)
|
234
|
+
|
235
|
+
cv_results = xgb.cv(
|
236
|
+
params=param,
|
237
|
+
dtrain=xgb_matrix,
|
238
|
+
num_boost_round=1000,
|
239
|
+
nfold=5,
|
240
|
+
early_stopping_rounds=25,
|
241
|
+
metrics=["logloss", "auc"],
|
242
|
+
seed=seed
|
243
|
+
)
|
244
|
+
|
245
|
+
# Record results
|
246
|
+
best_df.loc[i] = param
|
247
|
+
best_ll.loc[i] = [
|
248
|
+
cv_results["test-logloss-mean"].min(),
|
249
|
+
cv_results["test-logloss-mean"].idxmin(),
|
250
|
+
cv_results["test-auc-mean"].max(),
|
251
|
+
cv_results["test-auc-mean"].idxmax(),
|
252
|
+
seed
|
253
|
+
]
|
254
|
+
|
255
|
+
# Combine results
|
256
|
+
best_all = pd.concat([best_df, best_ll], axis=1).dropna()
|
257
|
+
|
258
|
+
# Arrange to get best run
|
259
|
+
best_all = best_all.sort_values(by="auc", ascending=False)
|
260
|
+
|
261
|
+
best_all.to_csv("tools/xg_model/testing/xg_model_training_runs.csv",index=False)
|
262
|
+
|
263
|
+
# Final parameters
|
264
|
+
param_7_EV = {
|
160
265
|
"objective": "binary:logistic",
|
161
266
|
"eval_metric": ["logloss", "auc"],
|
162
|
-
"
|
163
|
-
"
|
164
|
-
"
|
165
|
-
"
|
166
|
-
"
|
167
|
-
"
|
168
|
-
"max_delta_step": np.random.randint(4, 9)
|
267
|
+
"gamma": best_all['gamma'].iloc[0],
|
268
|
+
"subsample": best_all['subsample'].iloc[0],
|
269
|
+
"max_depth": best_all['max_depth'].iloc[0],
|
270
|
+
"colsample_bytree": best_all['colsample_bytree'].iloc[0],
|
271
|
+
"min_child_weight": best_all['min_child_weight'].iloc[0],
|
272
|
+
"max_delta_step": best_all['max_delta_step'].iloc[0],
|
169
273
|
}
|
170
|
-
|
171
|
-
# Cross-validation
|
172
|
-
seed = np.random.randint(0, 10000)
|
173
|
-
np.random.seed(seed)
|
174
|
-
|
175
|
-
cv_results = xgb.cv(
|
176
|
-
params=param,
|
177
|
-
dtrain=xgb_matrix,
|
178
|
-
num_boost_round=1000,
|
179
|
-
nfold=5,
|
180
|
-
early_stopping_rounds=25,
|
181
|
-
metrics=["logloss", "auc"],
|
182
|
-
seed=seed
|
183
|
-
)
|
184
|
-
|
185
|
-
# Record results
|
186
|
-
best_df.loc[i] = param
|
187
|
-
best_ll.loc[i] = [
|
188
|
-
cv_results["test-logloss-mean"].min(),
|
189
|
-
cv_results["test-logloss-mean"].idxmin(),
|
190
|
-
cv_results["test-auc-mean"].max(),
|
191
|
-
cv_results["test-auc-mean"].idxmax(),
|
192
|
-
seed
|
193
|
-
]
|
194
274
|
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
seed = np.random.randint(0, 10000)
|
229
|
-
np.random.seed(seed)
|
230
|
-
|
231
|
-
cv_rounds = xgb.cv(
|
232
|
-
params=param_7_EV,
|
233
|
-
dtrain=xgb_matrix,
|
234
|
-
num_boost_round=1000,
|
235
|
-
nfold=5,
|
236
|
-
early_stopping_rounds=25,
|
237
|
-
metrics=["logloss", "auc"],
|
238
|
-
seed=seed
|
239
|
-
)
|
240
|
-
|
241
|
-
# Record results
|
242
|
-
cv_test.loc[i] = [
|
243
|
-
cv_rounds["test-auc-mean"].idxmax(),
|
244
|
-
cv_rounds["test-auc-mean"].max(),
|
245
|
-
cv_rounds["test-logloss-mean"].idxmin(),
|
246
|
-
cv_rounds["test-logloss-mean"].min(),
|
247
|
-
seed
|
248
|
-
]
|
249
|
-
|
250
|
-
# Clean results and sort to find the number of rounds to use and seed
|
251
|
-
cv_final = cv_test.sort_values(by="AUC", ascending=False)
|
252
|
-
if overwrite == True:
|
253
|
-
cv_final.to_csv("xg_model/testing/xg_model_cv_runs.csv",index=False)
|
275
|
+
# CV rounds Loop
|
276
|
+
run_num = cv_runs
|
277
|
+
cv_test = pd.DataFrame(columns=["AUC_rounds", "AUC", "LL_rounds", "LL", "seed"])
|
278
|
+
|
279
|
+
for i in range(run_num):
|
280
|
+
print(f"### LOOP: {i+1} ###")
|
281
|
+
|
282
|
+
seed = np.random.randint(0, 10000)
|
283
|
+
np.random.seed(seed)
|
284
|
+
|
285
|
+
cv_rounds = xgb.cv(
|
286
|
+
params=param_7_EV,
|
287
|
+
dtrain=xgb_matrix,
|
288
|
+
num_boost_round=1000,
|
289
|
+
nfold=5,
|
290
|
+
early_stopping_rounds=25,
|
291
|
+
metrics=["logloss", "auc"],
|
292
|
+
seed=seed
|
293
|
+
)
|
294
|
+
|
295
|
+
# Record results
|
296
|
+
cv_test.loc[i] = [
|
297
|
+
cv_rounds["test-auc-mean"].idxmax(),
|
298
|
+
cv_rounds["test-auc-mean"].max(),
|
299
|
+
cv_rounds["test-logloss-mean"].idxmin(),
|
300
|
+
cv_rounds["test-logloss-mean"].min(),
|
301
|
+
seed
|
302
|
+
]
|
303
|
+
|
304
|
+
# Clean results and sort to find the number of rounds to use and seed
|
305
|
+
cv_final = cv_test.sort_values(by="AUC", ascending=False)
|
306
|
+
cv_final.to_csv("tools/xg_model/testing/xg_model_cv_runs.csv",index=False)
|
254
307
|
else:
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
cv_final.loc[len(cv_final)] = cv_test.mean()
|
308
|
+
# Load previous parameters
|
309
|
+
best_all = pd.read_csv('tools/xg_model/testing/xg_model_training_runs.csv')
|
310
|
+
cv_final = pd.read_csv("tools/xg_model/testing/xg_model_cv_runs.csv")
|
259
311
|
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
312
|
+
print('Loaded hyperparameters...')
|
313
|
+
# Final parameters
|
314
|
+
param_7_EV = {
|
315
|
+
"objective": "binary:logistic",
|
316
|
+
"eval_metric": ["logloss", "auc"],
|
317
|
+
"gamma": best_all['gamma'].iloc[0],
|
318
|
+
"subsample": best_all['subsample'].iloc[0],
|
319
|
+
"max_depth": best_all['max_depth'].iloc[0],
|
320
|
+
"colsample_bytree": best_all['colsample_bytree'].iloc[0],
|
321
|
+
"min_child_weight": best_all['min_child_weight'].iloc[0],
|
322
|
+
"max_delta_step": best_all['max_delta_step'].iloc[0],
|
323
|
+
}
|
267
324
|
|
325
|
+
print('Training model...')
|
326
|
+
seed = int(cv_final['seed'].iloc[0])
|
327
|
+
np.random.seed(seed)
|
268
328
|
model = xgb.train(
|
269
329
|
params=param_7_EV,
|
270
330
|
dtrain=xgb_matrix,
|
271
|
-
num_boost_round=
|
272
|
-
verbose_eval=2
|
331
|
+
num_boost_round=int(cv_final['AUC_rounds'].iloc[0]),
|
332
|
+
verbose_eval=2,
|
273
333
|
)
|
274
|
-
|
334
|
+
|
335
|
+
#Save model
|
275
336
|
joblib.dump(model,model_path)
|
276
337
|
|
277
338
|
else:
|
278
339
|
model = joblib.load(model_path)
|
279
|
-
pbp['xG'] = np.where(pbp['event_type'].isin(fenwick_events),model.predict(xgb_matrix),"")
|
280
|
-
return pbp
|
281
340
|
|
282
|
-
|
283
|
-
|
341
|
+
#Predict goal
|
342
|
+
data['xG'] = model.predict(xgb_matrix)
|
343
|
+
|
344
|
+
#Drop previous xG if it exists
|
345
|
+
pbp = pbp.drop(columns=['xG'],errors='ignore')
|
346
|
+
|
347
|
+
#Merge
|
348
|
+
comm = list(data.columns.intersection(pbp.columns))
|
349
|
+
comm.remove('event_index')
|
350
|
+
data = data.drop(columns=comm)
|
351
|
+
pbp_xg = pd.merge(pbp,data,how='left')
|
352
|
+
|
353
|
+
return pbp_xg
|
354
|
+
|
355
|
+
def feature_importance(model):
|
356
|
+
print('Feature importance for WSBA xG Model...')
|
357
|
+
model = joblib.load(model)
|
358
|
+
|
359
|
+
fig, ax = plt.subplots(figsize=(10, 7))
|
360
|
+
xgb.plot_importance(model,
|
361
|
+
importance_type='weight',
|
362
|
+
max_num_features=30,
|
363
|
+
height=0.5,
|
364
|
+
grid=False,
|
365
|
+
show_values=False,
|
366
|
+
xlabel='Weight',
|
367
|
+
title='WSBA xG Feature Importance',
|
368
|
+
ax=ax
|
369
|
+
)
|
370
|
+
plt.savefig('tools/xg_model/metrics/feature_importance.png',bbox_inches='tight')
|
284
371
|
|
285
|
-
|
286
|
-
|
287
|
-
db = pd.read_parquet("tools/xg_model/moneypuck/shots/shots_2007-2023.parquet")
|
288
|
-
except:
|
289
|
-
url = 'https://peter-tanner.com/moneypuck/downloads/shots_2007-2023.zip'
|
372
|
+
def roc_auc_curve(pbp,model):
|
373
|
+
print('ROC-AUC Curve for WSBA xG Model...')
|
290
374
|
|
291
|
-
|
375
|
+
#Recalibrate coordinates
|
376
|
+
pbp = scraping.adjust_coords(pbp)
|
292
377
|
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
378
|
+
#Filter unwanted data:
|
379
|
+
#Shots must occur in specified events and strength states, occur in open play, and have valid coordinates
|
380
|
+
pbp_prep = pbp.loc[(pbp['event_type'].isin(events))&
|
381
|
+
(pbp['strength_state'].isin(strengths))&
|
382
|
+
(pbp['period'] < 5)&
|
383
|
+
(pbp['x'].notna())&
|
384
|
+
(pbp['y'].notna())]
|
299
385
|
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
db = pd.read_csv("tools/xg_model/moneypuck/shots/shots_2007-2023.csv")
|
386
|
+
pbp = prep_xG_data(pbp_prep)
|
387
|
+
model = joblib.load(model)
|
388
|
+
|
389
|
+
data = pbp.loc[pbp['event_type'].isin(fenwick_events)]
|
305
390
|
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
repo_path = f"tools/xg_model/moneypuck/shots_{new}.zip"
|
311
|
-
|
312
|
-
response = rs.get(url)
|
313
|
-
|
314
|
-
if response.status_code == 200:
|
315
|
-
with open(repo_path, 'wb') as file:
|
316
|
-
file.write(response.content)
|
317
|
-
print('File downloaded successfully')
|
318
|
-
else:
|
319
|
-
print('Failed to download file')
|
320
|
-
|
321
|
-
with ZipFile(repo_path, 'r') as zObject:
|
322
|
-
zObject.extractall(
|
323
|
-
path="tools/xg_model/moneypuck/shots/")
|
324
|
-
|
325
|
-
new_season = pd.read_csv(f"tools/xg_model/moneypuck/shots/shots_{new}.csv")
|
326
|
-
#Convert to parquet
|
327
|
-
new_season.to_parquet(f"tools/xg_model/moneypuck/shots/shots_{new}.csv",index=False)
|
328
|
-
else:
|
329
|
-
new_season = pd.DataFrame()
|
330
|
-
#Combine shots
|
331
|
-
moneypuck = pd.concat([db,new_season])
|
332
|
-
|
333
|
-
#Find game ids that occur in supplied pbp and filter moneypuck shots accordingly
|
334
|
-
moneypuck['game_id'] = moneypuck['season'].astype(str)+"0"+moneypuck['game_id'].astype(str)
|
335
|
-
moneypuck['event'] = moneypuck['event'].replace({
|
336
|
-
"SHOT":"shot-on-goal",
|
337
|
-
"MISS":"missed-shot",
|
338
|
-
"BLOCK":"blocked-shot",
|
339
|
-
"GOAL":"goal"
|
340
|
-
})
|
391
|
+
data_sparse = sp.csr_matrix(data[[target]+continuous+boolean])
|
392
|
+
|
393
|
+
is_goal_vect = data_sparse[:, 0].A
|
394
|
+
predictors = data_sparse[:, 1:]
|
341
395
|
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
"T.B":"TBL",
|
348
|
-
})
|
349
|
-
pbp['event_team_abbr'] = pbp['event_team_abbr'].replace({
|
350
|
-
"L.A":"LAK",
|
351
|
-
"N.J":"NJD",
|
352
|
-
"S.J":"SJS",
|
353
|
-
"T.B":"TBL",
|
354
|
-
"PHX":'ARI'
|
355
|
-
})
|
356
|
-
|
357
|
-
#Managing oddities in datatypes
|
358
|
-
moneypuck[['game_id','period','time']] = moneypuck[['game_id','period','time']].astype(int)
|
359
|
-
pbp[['game_id','period','seconds_elapsed']] = pbp[['game_id','period','seconds_elapsed']].astype(int)
|
360
|
-
|
361
|
-
#Modify and merge
|
362
|
-
moneypuck = moneypuck[['game_id','period','time','event','teamCode','shooterPlayerId','xGoal']]
|
363
|
-
comb = pd.merge(pbp,moneypuck
|
364
|
-
,left_on=['game_id','period','seconds_elapsed','event_type','event_team_abbr','event_player_1_id']
|
365
|
-
,right_on=['game_id','period','time','event','teamCode','shooterPlayerId']
|
366
|
-
,how='left')
|
396
|
+
xgb_matrix = xgb.DMatrix(data=predictors,label=is_goal_vect,feature_names=(continuous+boolean))
|
397
|
+
|
398
|
+
pred = model.predict(xgb_matrix)
|
399
|
+
fpr, tpr, _ = roc_curve(is_goal_vect, pred)
|
400
|
+
roc_auc = auc(fpr,tpr)
|
367
401
|
|
368
|
-
|
369
|
-
|
402
|
+
plt.figure()
|
403
|
+
plt.plot(fpr,tpr,label=f"ROC (AUC = {roc_auc:.4f})")
|
404
|
+
plt.plot([0, 1], [0, 1], linestyle="--")
|
405
|
+
plt.title("WSBA xG ROC Curve")
|
406
|
+
plt.xlabel("False Positive Rate")
|
407
|
+
plt.ylabel("True Positive Rate")
|
408
|
+
plt.legend(loc="lower right")
|
409
|
+
plt.savefig('tools/xg_model/metrics/roc_auc_curve.png')
|
410
|
+
|
411
|
+
def reliability(pbp,model):
|
412
|
+
print('Reliability for WSBA xG Model...')
|
413
|
+
|
414
|
+
#Recalibrate coordinates
|
415
|
+
pbp = scraping.adjust_coords(pbp)
|
416
|
+
|
417
|
+
#Filter unwanted data:
|
418
|
+
#Shots must occur in specified events and strength states, occur in open play, and have valid coordinates
|
419
|
+
pbp_prep = pbp.loc[(pbp['event_type'].isin(events))&
|
420
|
+
(pbp['strength_state'].isin(strengths))&
|
421
|
+
(pbp['period'] < 5)&
|
422
|
+
(pbp['x'].notna())&
|
423
|
+
(pbp['y'].notna())]
|
424
|
+
|
425
|
+
pbp = prep_xG_data(pbp_prep)
|
426
|
+
model = joblib.load(model)
|
427
|
+
|
428
|
+
data = pbp.loc[pbp['event_type'].isin(fenwick_events)]
|
370
429
|
|
371
|
-
|
372
|
-
print("No MoneyPuck xG values were found for this game...")
|
430
|
+
data_sparse = sp.csr_matrix(data[[target]+continuous+boolean])
|
373
431
|
|
374
|
-
|
375
|
-
|
432
|
+
is_goal_vect = data_sparse[:, 0].A
|
433
|
+
predictors = data_sparse[:, 1:]
|
434
|
+
|
435
|
+
xgb_matrix = xgb.DMatrix(data=predictors,label=is_goal_vect,feature_names=(continuous+boolean))
|
436
|
+
|
437
|
+
pred = model.predict(xgb_matrix)
|
438
|
+
fop, mpv = calibration_curve(is_goal_vect, pred, strategy='uniform')
|
439
|
+
|
440
|
+
plt.figure()
|
441
|
+
plt.plot(mpv, fop, "s-", label="Model")
|
442
|
+
plt.plot([0, 1], [0, 1], linestyle="--", label="Perfect calibration")
|
443
|
+
plt.title("WSBA xG Reliability Diagram")
|
444
|
+
plt.xlabel("Predicted Probability (mean)")
|
445
|
+
plt.ylabel("Fraction of positives")
|
446
|
+
plt.legend(loc="best")
|
447
|
+
plt.savefig('tools/xg_model/metrics/reliability.png')
|