wsba-hockey 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- wsba_hockey/tools/plotting.py +3 -3
- wsba_hockey/tools/xg_model.py +41 -15
- wsba_hockey/workspace.py +24 -8
- wsba_hockey/wsba_main.py +24 -5
- {wsba_hockey-1.0.2.dist-info → wsba_hockey-1.0.3.dist-info}/METADATA +2 -2
- {wsba_hockey-1.0.2.dist-info → wsba_hockey-1.0.3.dist-info}/RECORD +9 -9
- {wsba_hockey-1.0.2.dist-info → wsba_hockey-1.0.3.dist-info}/WHEEL +0 -0
- {wsba_hockey-1.0.2.dist-info → wsba_hockey-1.0.3.dist-info}/licenses/LICENSE +0 -0
- {wsba_hockey-1.0.2.dist-info → wsba_hockey-1.0.3.dist-info}/top_level.txt +0 -0
wsba_hockey/tools/plotting.py
CHANGED
@@ -14,9 +14,9 @@ event_markers = {
|
|
14
14
|
'faceoff':'X',
|
15
15
|
'hit':'P',
|
16
16
|
'blocked-shot':'v',
|
17
|
-
'missed-shot':'
|
17
|
+
'missed-shot':'o',
|
18
18
|
'shot-on-goal':'D',
|
19
|
-
'goal':'
|
19
|
+
'goal':'*',
|
20
20
|
'giveaway':'1',
|
21
21
|
'takeaway':'2',
|
22
22
|
}
|
@@ -135,7 +135,7 @@ def plot_game_events(pbp,game_id,events,strengths,marker_dict=event_markers,team
|
|
135
135
|
|
136
136
|
for event in events:
|
137
137
|
plays = pbp.loc[pbp['event_type']==event]
|
138
|
-
ax.scatter(plays['x_adj'],plays['y_adj'],plays['size'],plays['color'],marker=event_markers[event],label=event,zorder=5)
|
138
|
+
ax.scatter(plays['x_adj'],plays['y_adj'],plays['size'],plays['color'],marker=event_markers[event],edgecolors='white',label=event,zorder=5)
|
139
139
|
|
140
140
|
ax.set_title(f'{away_abbr} @ {home_abbr} - {date}')
|
141
141
|
ax.legend(bbox_to_anchor =(0.5,-0.35), loc='lower center',ncol=1).set_visible(legend)
|
wsba_hockey/tools/xg_model.py
CHANGED
@@ -108,7 +108,7 @@ def wsba_xG(pbp, train = False, overwrite = False, model_path = "tools/xg_model/
|
|
108
108
|
'prior_faceoff']
|
109
109
|
|
110
110
|
#Prep Data
|
111
|
-
|
111
|
+
pbp_prep = prep_xG_data(pbp)
|
112
112
|
#Filter unwanted date:
|
113
113
|
#Shots must occur in specified events and strength states, occur before the shootout, and have valid coordinates
|
114
114
|
events = ['faceoff','hit','giveaway','takeaway','blocked-shot','missed-shot','shot-on-goal','goal']
|
@@ -127,12 +127,12 @@ def wsba_xG(pbp, train = False, overwrite = False, model_path = "tools/xg_model/
|
|
127
127
|
'6v4',
|
128
128
|
'6v5']
|
129
129
|
|
130
|
-
data =
|
131
|
-
(
|
132
|
-
(
|
133
|
-
(
|
134
|
-
(
|
135
|
-
~((
|
130
|
+
data = pbp_prep.loc[(pbp_prep['event_type'].isin(events))&
|
131
|
+
(pbp_prep['strength_state'].isin(strengths))&
|
132
|
+
(pbp_prep['period'] < 5)&
|
133
|
+
(pbp_prep['x_fixed'].notna())&
|
134
|
+
(pbp_prep['y_fixed'].notna())&
|
135
|
+
~((pbp_prep['x_fixed']==0)&(pbp_prep['y_fixed']==0)&(pbp_prep['x_fixed'].isin(fenwick_events))&(pbp_prep['event_distance']!=90))]
|
136
136
|
|
137
137
|
#Convert to sparse
|
138
138
|
data_sparse = sp.csr_matrix(data[[target]+continous+boolean])
|
@@ -199,11 +199,11 @@ def wsba_xG(pbp, train = False, overwrite = False, model_path = "tools/xg_model/
|
|
199
199
|
best_all = best_all.sort_values(by="auc", ascending=False)
|
200
200
|
|
201
201
|
if overwrite == True:
|
202
|
-
best_all.to_csv("xg_model/testing/xg_model_training_runs.csv",index=False)
|
202
|
+
best_all.to_csv("tools/xg_model/testing/xg_model_training_runs.csv",index=False)
|
203
203
|
else:
|
204
|
-
best_old = pd.read_csv("xg_model/testing/xg_model_training_runs.csv")
|
204
|
+
best_old = pd.read_csv("tools/xg_model/testing/xg_model_training_runs.csv")
|
205
205
|
best_comb = pd.concat([best_old,best_all])
|
206
|
-
best_comb.to_csv("xg_model/testing/xg_model_training_runs.csv",index=False)
|
206
|
+
best_comb.to_csv("tools/xg_model/testing/xg_model_training_runs.csv",index=False)
|
207
207
|
|
208
208
|
# Final parameters
|
209
209
|
param_7_EV = {
|
@@ -250,11 +250,11 @@ def wsba_xG(pbp, train = False, overwrite = False, model_path = "tools/xg_model/
|
|
250
250
|
# Clean results and sort to find the number of rounds to use and seed
|
251
251
|
cv_final = cv_test.sort_values(by="AUC", ascending=False)
|
252
252
|
if overwrite == True:
|
253
|
-
cv_final.to_csv("xg_model/testing/xg_model_cv_runs.csv",index=False)
|
253
|
+
cv_final.to_csv("tools/xg_model/testing/xg_model_cv_runs.csv",index=False)
|
254
254
|
else:
|
255
|
-
cv_old = pd.read_csv("xg_model/testing/xg_model_cv_runs.csv")
|
255
|
+
cv_old = pd.read_csv("tools/xg_model/testing/xg_model_cv_runs.csv")
|
256
256
|
cv_comb = pd.concat([cv_old,cv_final])
|
257
|
-
cv_comb.to_csv("xg_model/testing/xg_model_cv_runs.csv")
|
257
|
+
cv_comb.to_csv("tools/xg_model/testing/xg_model_cv_runs.csv")
|
258
258
|
cv_final.loc[len(cv_final)] = cv_test.mean()
|
259
259
|
|
260
260
|
# Train the final model
|
@@ -276,8 +276,34 @@ def wsba_xG(pbp, train = False, overwrite = False, model_path = "tools/xg_model/
|
|
276
276
|
|
277
277
|
else:
|
278
278
|
model = joblib.load(model_path)
|
279
|
-
|
280
|
-
|
279
|
+
|
280
|
+
#Predict goal
|
281
|
+
data['xG'] = model.predict(xgb_matrix)
|
282
|
+
data['xG'] = np.where(data['event_type'].isin(fenwick_events),data['xG'],np.nan)
|
283
|
+
|
284
|
+
#Avoid merging errors
|
285
|
+
merge_col = ['game_id','period','seconds_elapsed','event_type','event_team_abbr','event_player_1_id']
|
286
|
+
|
287
|
+
for df in [pbp,data]:
|
288
|
+
df = df.astype({
|
289
|
+
'game_id':'int',
|
290
|
+
'period':'int',
|
291
|
+
'seconds_elapsed':'int',
|
292
|
+
'event_type':'str',
|
293
|
+
'event_team_abbr':'str',
|
294
|
+
'event_player_1_id':'float'
|
295
|
+
})
|
296
|
+
|
297
|
+
#Drop previous xG if it exists
|
298
|
+
try: pbp = pbp.drop(columns=['xG'])
|
299
|
+
except KeyError:
|
300
|
+
''
|
301
|
+
|
302
|
+
#Merge
|
303
|
+
data = data[merge_col+['xG']]
|
304
|
+
pbp_xg = pd.merge(pbp,data,how='left')
|
305
|
+
|
306
|
+
return pbp_xg
|
281
307
|
|
282
308
|
def moneypuck_xG(pbp,repo_path = "tools/xg_model/moneypuck/shots_2007-2023.zip"):
|
283
309
|
#Given play-by-play, return itself with xG column sourced from MoneyPuck.com
|
wsba_hockey/workspace.py
CHANGED
@@ -8,6 +8,7 @@ from gspread_pandas import Spread, Client
|
|
8
8
|
import urllib.request
|
9
9
|
from wand.color import Color
|
10
10
|
from wand.image import Image
|
11
|
+
from tools.xg_model import wsba_xG
|
11
12
|
|
12
13
|
season_load = wsba.repo_load_seasons()
|
13
14
|
|
@@ -21,6 +22,13 @@ def workspace(seasons,type,arg = '',start='',end=''):
|
|
21
22
|
data['pbp'].to_csv(f'pbp/nhl_pbp_{season}.csv',index=False)
|
22
23
|
print(f'Errors: {errors}')
|
23
24
|
|
25
|
+
elif type == 'pbp_xg':
|
26
|
+
#Add xG to pbp
|
27
|
+
for season in seasons:
|
28
|
+
print(f'WSBA xG for {season}')
|
29
|
+
data = pd.read_parquet(f'pbp/parquet/nhl_pbp_{season}.parquet')
|
30
|
+
wsba_xG(data).to_parquet(f'pbp/parquet/nhl_pbp_{season}.parquet',index=False)
|
31
|
+
|
24
32
|
elif type == 'convert':
|
25
33
|
for season in seasons:
|
26
34
|
data = pd.read_csv(f"pbp/nhl_pbp_{season}.csv")
|
@@ -40,20 +48,26 @@ def workspace(seasons,type,arg = '',start='',end=''):
|
|
40
48
|
|
41
49
|
elif type == 'stats':
|
42
50
|
#Stats building
|
43
|
-
|
44
|
-
|
45
|
-
for
|
51
|
+
for group in ['skater','team']:
|
52
|
+
stats = []
|
53
|
+
for season in seasons:
|
46
54
|
pbp = pd.read_parquet(f'pbp/parquet/nhl_pbp_{season}.parquet')
|
47
55
|
stat = wsba.nhl_calculate_stats(pbp,group,[2],['5v5'],shot_impact=True)
|
48
56
|
stat.to_csv(f'stats/{group}/wsba_nhl_{season}_{group}.csv',index=False)
|
49
|
-
stats.append(stat)
|
50
|
-
|
51
|
-
|
57
|
+
stats.append(stat)
|
58
|
+
|
59
|
+
if arg:
|
60
|
+
pd.concat(stats).to_csv(f'stats/db/wsba_nhl_{group}_db.csv',index=False)
|
61
|
+
|
62
|
+
elif type == 'xg_model':
|
63
|
+
data = pd.concat([pd.read_parquet(f'pbp/parquet/nhl_pbp_{season}.parquet') for season in seasons])
|
64
|
+
wsba.wsba_main.wsba_xG(data,True,True)
|
65
|
+
|
52
66
|
elif type == 'plot_game':
|
53
67
|
for season in seasons:
|
54
68
|
pbp = wsba.nhl_scrape_season(season,remove=[],start=start,end=end)
|
55
69
|
|
56
|
-
plots = wsba.nhl_plot_games(pbp,wsba.wsba_main.fenwick_events,['5v5'],'all',team_colors=arg,legend=
|
70
|
+
plots = wsba.nhl_plot_games(pbp,wsba.wsba_main.fenwick_events,['5v5'],'all',team_colors=arg,legend=False,xg='wsba')
|
57
71
|
|
58
72
|
games = list(pbp['game_id'].astype(str).drop_duplicates())
|
59
73
|
i = 1
|
@@ -111,4 +125,6 @@ def push_to_sheet():
|
|
111
125
|
spread.df_to_sheet(country,index=False,sheet='Country Info')
|
112
126
|
spread.df_to_sheet(schedule,index=False,sheet='Schedule')
|
113
127
|
|
114
|
-
|
128
|
+
print('Done.')
|
129
|
+
|
130
|
+
workspace(season_load[6:18],,Tru)
|
wsba_hockey/wsba_main.py
CHANGED
@@ -541,15 +541,15 @@ def nhl_shooting_impacts(agg,team=False):
|
|
541
541
|
pos['RushF/60'] = (pos['RushF']/pos['TOI'])*60
|
542
542
|
pos['RushA/60'] = (pos['RushA']/pos['TOI'])*60
|
543
543
|
pos['Rushes FF'] = pos['RushF/60'].rank(pct=True)
|
544
|
-
pos['Rushes FA'] = pos['RushA/60'].rank(pct=True)
|
544
|
+
pos['Rushes FA'] = 1 - pos['RushA/60'].rank(pct=True)
|
545
545
|
pos['RushFxG/60'] = (pos['RushFxG']/pos['TOI'])*60
|
546
546
|
pos['RushAxG/60'] = (pos['RushAxG']/pos['TOI'])*60
|
547
547
|
pos['Rushes xGF'] = pos['RushFxG/60'].rank(pct=True)
|
548
|
-
pos['Rushes xGA'] = pos['RushAxG/60'].rank(pct=True)
|
548
|
+
pos['Rushes xGA'] = 1 - pos['RushAxG/60'].rank(pct=True)
|
549
549
|
pos['RushFG/60'] = (pos['RushFG']/pos['TOI'])*60
|
550
550
|
pos['RushAG/60'] = (pos['RushAG']/pos['TOI'])*60
|
551
551
|
pos['Rushes GF'] = pos['RushFG/60'].rank(pct=True)
|
552
|
-
pos['Rushes GA'] = pos['RushAG/60'].rank(pct=True)
|
552
|
+
pos['Rushes GA'] = 1 - pos['RushAG/60'].rank(pct=True)
|
553
553
|
|
554
554
|
#Flip against metric percentiles
|
555
555
|
pos['ODEF-SR'] = 1-pos['ODEF-SR']
|
@@ -642,7 +642,7 @@ def nhl_calculate_stats(pbp,type,season_types,game_strength,roster_path="rosters
|
|
642
642
|
# param 'xg' - xG model to apply to pbp for aggregation
|
643
643
|
# param 'shot_impact' - boolean determining if the shot impact model will be applied to the dataset
|
644
644
|
|
645
|
-
print(f"Calculating statistics for all games in the provided play-by-play data...\nSeasons included: {pbp['season'].drop_duplicates().to_list()}...")
|
645
|
+
print(f"Calculating statistics for all games in the provided play-by-play data for {type}s...\nSeasons included: {pbp['season'].drop_duplicates().to_list()}...")
|
646
646
|
start = time.perf_counter()
|
647
647
|
|
648
648
|
#Add extra data and apply team changes
|
@@ -674,6 +674,9 @@ def nhl_calculate_stats(pbp,type,season_types,game_strength,roster_path="rosters
|
|
674
674
|
if type == 'team':
|
675
675
|
complete = calc_team(pbp)
|
676
676
|
|
677
|
+
#WSBA
|
678
|
+
complete['WSBA'] = complete['Team']+complete['Season'].astype(str)
|
679
|
+
|
677
680
|
#Set TOI to minute
|
678
681
|
complete['TOI'] = complete['TOI']/60
|
679
682
|
|
@@ -681,6 +684,14 @@ def nhl_calculate_stats(pbp,type,season_types,game_strength,roster_path="rosters
|
|
681
684
|
for stat in per_sixty[7:13]:
|
682
685
|
complete[f'{stat}/60'] = (complete[stat]/complete['TOI'])*60
|
683
686
|
|
687
|
+
#Rank per 60 stats
|
688
|
+
for stat in per_sixty[7:13]:
|
689
|
+
complete[f'{stat}/60 Percentile'] = complete[f'{stat}/60'].rank(pct=True)
|
690
|
+
|
691
|
+
#Flip percentiles for against stats
|
692
|
+
for stat in ['FA','xGA','GA']:
|
693
|
+
complete[f'{stat}/60 Percentile'] = 1-complete[f'{stat}/60 Percentile']
|
694
|
+
|
684
695
|
end = time.perf_counter()
|
685
696
|
length = end-start
|
686
697
|
print(f'...finished in {(length if length <60 else length/60):.2f} {'seconds' if length <60 else 'minutes'}.')
|
@@ -735,6 +746,14 @@ def nhl_calculate_stats(pbp,type,season_types,game_strength,roster_path="rosters
|
|
735
746
|
for stat in per_sixty:
|
736
747
|
complete[f'{stat}/60'] = (complete[stat]/complete['TOI'])*60
|
737
748
|
|
749
|
+
#Rank per 60 stats
|
750
|
+
for stat in per_sixty:
|
751
|
+
complete[f'{stat}/60 Percentile'] = complete[f'{stat}/60'].rank(pct=True)
|
752
|
+
|
753
|
+
#Flip percentiles for against stats
|
754
|
+
for stat in ['FA','xGA','GA']:
|
755
|
+
complete[f'{stat}/60 Percentile'] = 1-complete[f'{stat}/60 Percentile']
|
756
|
+
|
738
757
|
#Add player age
|
739
758
|
complete['Birthday'] = pd.to_datetime(complete['Birthday'])
|
740
759
|
complete['season_year'] = complete['Season'].astype(str).str[4:8].astype(int)
|
@@ -768,7 +787,7 @@ def nhl_calculate_stats(pbp,type,season_types,game_strength,roster_path="rosters
|
|
768
787
|
"GF","FF","xGF","xGF/FF","GF/xGF","FshF%",
|
769
788
|
"GA","FA","xGA","xGA/FA","GA/xGA","FshA%",
|
770
789
|
'Rush',"Rush xG",'Rush G',"GC%","AC%","GI%","FC%","xGC%",
|
771
|
-
]+[f'{stat}/60' for stat in per_sixty]+type_metrics].fillna(0).sort_values(['Player','Season','Team','ID'])
|
790
|
+
]+[f'{stat}/60' for stat in per_sixty]+[f'{stat}/60 Percentile' for stat in per_sixty]+type_metrics].fillna(0).sort_values(['Player','Season','Team','ID'])
|
772
791
|
|
773
792
|
print(f'...finished in {(length if length <60 else length/60):.2f} {'seconds' if length <60 else 'minutes'}.')
|
774
793
|
#Apply shot impacts if necessary (Note: this will remove skaters with fewer than 150 minutes of TOI due to the shot impact TOI rule)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: wsba_hockey
|
3
|
-
Version: 1.0.
|
3
|
+
Version: 1.0.3
|
4
4
|
Summary: WeakSide Breakout's complete Python package of access to hockey data, primairly including the scraping of National Hockey League schedule, play-by-play, and shifts information.
|
5
5
|
Author-email: Owen Singh <owenbksingh@gmail.com>
|
6
6
|
Project-URL: Homepage, https://github.com/owensingh38/wsba_hockey/
|
@@ -69,7 +69,7 @@ wsba.nhl_scrape_prospects('BOS')
|
|
69
69
|
### Stat Aggregation
|
70
70
|
```python
|
71
71
|
pbp = wsba.nhl_scrape_season('20232024',remove=[], local = True)
|
72
|
-
wsba.nhl_calculate_stats(pbp,'skater',[2],['5v5','4v4','3v3'],xg='
|
72
|
+
wsba.nhl_calculate_stats(pbp,'skater',[2],['5v5','4v4','3v3'],xg='wsba',shot_impact = True)
|
73
73
|
```
|
74
74
|
### Shot Plotting (Plots, Heatmaps, etc.)
|
75
75
|
```python
|
@@ -1,19 +1,19 @@
|
|
1
1
|
wsba_hockey/__init__.py,sha256=QXyc8FFlCDWQuECyyEbj80ASHEbTFj4R13DOFOY9nJg,353
|
2
|
-
wsba_hockey/workspace.py,sha256=
|
3
|
-
wsba_hockey/wsba_main.py,sha256=
|
2
|
+
wsba_hockey/workspace.py,sha256=YQMHCAiCCsJiaSs_MZI_2tKuQp6dmQImIZw-RvjBEhA,5395
|
3
|
+
wsba_hockey/wsba_main.py,sha256=xyZLnOZxzPAi1-n9mBsQ8ThyGJkVotRg5I39SiM_CYs,37886
|
4
4
|
wsba_hockey/stats/calculate_viz/shot_impact.py,sha256=7zxf64yt87YDucUBG31W75u951AUbMC7a3x5ClNIxYI,39
|
5
5
|
wsba_hockey/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
6
6
|
wsba_hockey/tools/agg.py,sha256=WRfuxJt0OgDSEkqHtuGql9nZrQnLGzkm6DUFVbDXayE,9560
|
7
|
-
wsba_hockey/tools/plotting.py,sha256=
|
7
|
+
wsba_hockey/tools/plotting.py,sha256=olPGYJtVFIjBwrOQyz1CNNqaCM6iOJTKjWXBA3egJHM,6022
|
8
8
|
wsba_hockey/tools/scraping.py,sha256=hZv1XMtQjsVSSVwN6Fzw7FuT94zSQGyX1WupTjUjUuU,48548
|
9
|
-
wsba_hockey/tools/xg_model.py,sha256=
|
9
|
+
wsba_hockey/tools/xg_model.py,sha256=ItVWGvYzi3zHA8mPCkUbpTU7NUcQFSw3Xm_nV0E6ypQ,16004
|
10
10
|
wsba_hockey/tools/archive/old_scraping.py,sha256=hEjMI1RtfeZnf0RBiJFI38oXkLZ3WofeH5xqcF4pzgM,49585
|
11
11
|
wsba_hockey/tools/utils/__init__.py,sha256=vccXhOtzARoR99fmEWU1OEI3qCIdQ9Z42AlRA_BUhrs,114
|
12
12
|
wsba_hockey/tools/utils/config.py,sha256=D3Uk05-YTyrhfReMTTLfNI3HN_rON2uo_CDE9oER3Lg,351
|
13
13
|
wsba_hockey/tools/utils/save_pages.py,sha256=CsyL_0n-b-4pJoUauwU3HpnCO6n69-RlBMJQBd_qGDc,4979
|
14
14
|
wsba_hockey/tools/utils/shared.py,sha256=dH_JwZfia5fib8rksy5sW-mBp0pluBPvw37Vdr8Kap0,14211
|
15
|
-
wsba_hockey-1.0.
|
16
|
-
wsba_hockey-1.0.
|
17
|
-
wsba_hockey-1.0.
|
18
|
-
wsba_hockey-1.0.
|
19
|
-
wsba_hockey-1.0.
|
15
|
+
wsba_hockey-1.0.3.dist-info/licenses/LICENSE,sha256=Nr_Um1Pd5FQJTWWgm7maZArdtYMbDhzXYSwyJIZDGik,1114
|
16
|
+
wsba_hockey-1.0.3.dist-info/METADATA,sha256=Cds728R4Mz7RqNuOA0mnMciv4wPUjkNShlPScxaKclw,3542
|
17
|
+
wsba_hockey-1.0.3.dist-info/WHEEL,sha256=pxyMxgL8-pra_rKaQ4drOZAegBVuX-G_4nRHjjgWbmo,91
|
18
|
+
wsba_hockey-1.0.3.dist-info/top_level.txt,sha256=acU7s3x-RZC1zGiqCOmO0g267iqCg34lzIfdmYxxGmQ,12
|
19
|
+
wsba_hockey-1.0.3.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|