wsba-hockey 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,9 +14,9 @@ event_markers = {
14
14
  'faceoff':'X',
15
15
  'hit':'P',
16
16
  'blocked-shot':'v',
17
- 'missed-shot':'H',
17
+ 'missed-shot':'o',
18
18
  'shot-on-goal':'D',
19
- 'goal':'o',
19
+ 'goal':'*',
20
20
  'giveaway':'1',
21
21
  'takeaway':'2',
22
22
  }
@@ -135,7 +135,7 @@ def plot_game_events(pbp,game_id,events,strengths,marker_dict=event_markers,team
135
135
 
136
136
  for event in events:
137
137
  plays = pbp.loc[pbp['event_type']==event]
138
- ax.scatter(plays['x_adj'],plays['y_adj'],plays['size'],plays['color'],marker=event_markers[event],label=event,zorder=5)
138
+ ax.scatter(plays['x_adj'],plays['y_adj'],plays['size'],plays['color'],marker=event_markers[event],edgecolors='white',label=event,zorder=5)
139
139
 
140
140
  ax.set_title(f'{away_abbr} @ {home_abbr} - {date}')
141
141
  ax.legend(bbox_to_anchor =(0.5,-0.35), loc='lower center',ncol=1).set_visible(legend)
@@ -108,7 +108,7 @@ def wsba_xG(pbp, train = False, overwrite = False, model_path = "tools/xg_model/
108
108
  'prior_faceoff']
109
109
 
110
110
  #Prep Data
111
- pbp = prep_xG_data(pbp)
111
+ pbp_prep = prep_xG_data(pbp)
112
112
  #Filter unwanted date:
113
113
  #Shots must occur in specified events and strength states, occur before the shootout, and have valid coordinates
114
114
  events = ['faceoff','hit','giveaway','takeaway','blocked-shot','missed-shot','shot-on-goal','goal']
@@ -127,12 +127,12 @@ def wsba_xG(pbp, train = False, overwrite = False, model_path = "tools/xg_model/
127
127
  '6v4',
128
128
  '6v5']
129
129
 
130
- data = pbp.loc[(pbp['event_type'].isin(events))&
131
- (pbp['strength_state'].isin(strengths))&
132
- (pbp['period'] < 5)&
133
- (pbp['x_fixed'].notna())&
134
- (pbp['y_fixed'].notna())&
135
- ~((pbp['x_fixed']==0)&(pbp['y_fixed']==0)&(pbp['x_fixed'].isin(fenwick_events))&(pbp['event_distance']!=90))]
130
+ data = pbp_prep.loc[(pbp_prep['event_type'].isin(events))&
131
+ (pbp_prep['strength_state'].isin(strengths))&
132
+ (pbp_prep['period'] < 5)&
133
+ (pbp_prep['x_fixed'].notna())&
134
+ (pbp_prep['y_fixed'].notna())&
135
+ ~((pbp_prep['x_fixed']==0)&(pbp_prep['y_fixed']==0)&(pbp_prep['x_fixed'].isin(fenwick_events))&(pbp_prep['event_distance']!=90))]
136
136
 
137
137
  #Convert to sparse
138
138
  data_sparse = sp.csr_matrix(data[[target]+continous+boolean])
@@ -199,11 +199,11 @@ def wsba_xG(pbp, train = False, overwrite = False, model_path = "tools/xg_model/
199
199
  best_all = best_all.sort_values(by="auc", ascending=False)
200
200
 
201
201
  if overwrite == True:
202
- best_all.to_csv("xg_model/testing/xg_model_training_runs.csv",index=False)
202
+ best_all.to_csv("tools/xg_model/testing/xg_model_training_runs.csv",index=False)
203
203
  else:
204
- best_old = pd.read_csv("xg_model/testing/xg_model_training_runs.csv")
204
+ best_old = pd.read_csv("tools/xg_model/testing/xg_model_training_runs.csv")
205
205
  best_comb = pd.concat([best_old,best_all])
206
- best_comb.to_csv("xg_model/testing/xg_model_training_runs.csv",index=False)
206
+ best_comb.to_csv("tools/xg_model/testing/xg_model_training_runs.csv",index=False)
207
207
 
208
208
  # Final parameters
209
209
  param_7_EV = {
@@ -250,11 +250,11 @@ def wsba_xG(pbp, train = False, overwrite = False, model_path = "tools/xg_model/
250
250
  # Clean results and sort to find the number of rounds to use and seed
251
251
  cv_final = cv_test.sort_values(by="AUC", ascending=False)
252
252
  if overwrite == True:
253
- cv_final.to_csv("xg_model/testing/xg_model_cv_runs.csv",index=False)
253
+ cv_final.to_csv("tools/xg_model/testing/xg_model_cv_runs.csv",index=False)
254
254
  else:
255
- cv_old = pd.read_csv("xg_model/testing/xg_model_cv_runs.csv")
255
+ cv_old = pd.read_csv("tools/xg_model/testing/xg_model_cv_runs.csv")
256
256
  cv_comb = pd.concat([cv_old,cv_final])
257
- cv_comb.to_csv("xg_model/testing/xg_model_cv_runs.csv")
257
+ cv_comb.to_csv("tools/xg_model/testing/xg_model_cv_runs.csv")
258
258
  cv_final.loc[len(cv_final)] = cv_test.mean()
259
259
 
260
260
  # Train the final model
@@ -276,8 +276,34 @@ def wsba_xG(pbp, train = False, overwrite = False, model_path = "tools/xg_model/
276
276
 
277
277
  else:
278
278
  model = joblib.load(model_path)
279
- pbp['xG'] = np.where(pbp['event_type'].isin(fenwick_events),model.predict(xgb_matrix),"")
280
- return pbp
279
+
280
+ #Predict goal
281
+ data['xG'] = model.predict(xgb_matrix)
282
+ data['xG'] = np.where(data['event_type'].isin(fenwick_events),data['xG'],np.nan)
283
+
284
+ #Avoid merging errors
285
+ merge_col = ['game_id','period','seconds_elapsed','event_type','event_team_abbr','event_player_1_id']
286
+
287
+ for df in [pbp,data]:
288
+ df = df.astype({
289
+ 'game_id':'int',
290
+ 'period':'int',
291
+ 'seconds_elapsed':'int',
292
+ 'event_type':'str',
293
+ 'event_team_abbr':'str',
294
+ 'event_player_1_id':'float'
295
+ })
296
+
297
+ #Drop previous xG if it exists
298
+ try: pbp = pbp.drop(columns=['xG'])
299
+ except KeyError:
300
+ ''
301
+
302
+ #Merge
303
+ data = data[merge_col+['xG']]
304
+ pbp_xg = pd.merge(pbp,data,how='left')
305
+
306
+ return pbp_xg
281
307
 
282
308
  def moneypuck_xG(pbp,repo_path = "tools/xg_model/moneypuck/shots_2007-2023.zip"):
283
309
  #Given play-by-play, return itself with xG column sourced from MoneyPuck.com
wsba_hockey/workspace.py CHANGED
@@ -8,6 +8,7 @@ from gspread_pandas import Spread, Client
8
8
  import urllib.request
9
9
  from wand.color import Color
10
10
  from wand.image import Image
11
+ from tools.xg_model import wsba_xG
11
12
 
12
13
  season_load = wsba.repo_load_seasons()
13
14
 
@@ -21,6 +22,13 @@ def workspace(seasons,type,arg = '',start='',end=''):
21
22
  data['pbp'].to_csv(f'pbp/nhl_pbp_{season}.csv',index=False)
22
23
  print(f'Errors: {errors}')
23
24
 
25
+ elif type == 'pbp_xg':
26
+ #Add xG to pbp
27
+ for season in seasons:
28
+ print(f'WSBA xG for {season}')
29
+ data = pd.read_parquet(f'pbp/parquet/nhl_pbp_{season}.parquet')
30
+ wsba_xG(data).to_parquet(f'pbp/parquet/nhl_pbp_{season}.parquet',index=False)
31
+
24
32
  elif type == 'convert':
25
33
  for season in seasons:
26
34
  data = pd.read_csv(f"pbp/nhl_pbp_{season}.csv")
@@ -40,20 +48,26 @@ def workspace(seasons,type,arg = '',start='',end=''):
40
48
 
41
49
  elif type == 'stats':
42
50
  #Stats building
43
- stats = []
44
- for season in seasons:
45
- for group in ['skater','team']:
51
+ for group in ['skater','team']:
52
+ stats = []
53
+ for season in seasons:
46
54
  pbp = pd.read_parquet(f'pbp/parquet/nhl_pbp_{season}.parquet')
47
55
  stat = wsba.nhl_calculate_stats(pbp,group,[2],['5v5'],shot_impact=True)
48
56
  stat.to_csv(f'stats/{group}/wsba_nhl_{season}_{group}.csv',index=False)
49
- stats.append(stat)
50
- pd.concat(stats).to_csv(f'stats/db/wsba_nhl_{group}_db.csv',index=False)
51
-
57
+ stats.append(stat)
58
+
59
+ if arg:
60
+ pd.concat(stats).to_csv(f'stats/db/wsba_nhl_{group}_db.csv',index=False)
61
+
62
+ elif type == 'xg_model':
63
+ data = pd.concat([pd.read_parquet(f'pbp/parquet/nhl_pbp_{season}.parquet') for season in seasons])
64
+ wsba.wsba_main.wsba_xG(data,True,True)
65
+
52
66
  elif type == 'plot_game':
53
67
  for season in seasons:
54
68
  pbp = wsba.nhl_scrape_season(season,remove=[],start=start,end=end)
55
69
 
56
- plots = wsba.nhl_plot_games(pbp,wsba.wsba_main.fenwick_events,['5v5'],'all',team_colors=arg,legend=True)
70
+ plots = wsba.nhl_plot_games(pbp,wsba.wsba_main.fenwick_events,['5v5'],'all',team_colors=arg,legend=False,xg='wsba')
57
71
 
58
72
  games = list(pbp['game_id'].astype(str).drop_duplicates())
59
73
  i = 1
@@ -111,4 +125,6 @@ def push_to_sheet():
111
125
  spread.df_to_sheet(country,index=False,sheet='Country Info')
112
126
  spread.df_to_sheet(schedule,index=False,sheet='Schedule')
113
127
 
114
- workspace(['20242025'],'plot_game',arg={'away':'secondary','home':'primary'}, start='04-20',end='04-20')
128
+ print('Done.')
129
+
130
+ workspace(season_load[6:18],,Tru)
wsba_hockey/wsba_main.py CHANGED
@@ -541,15 +541,15 @@ def nhl_shooting_impacts(agg,team=False):
541
541
  pos['RushF/60'] = (pos['RushF']/pos['TOI'])*60
542
542
  pos['RushA/60'] = (pos['RushA']/pos['TOI'])*60
543
543
  pos['Rushes FF'] = pos['RushF/60'].rank(pct=True)
544
- pos['Rushes FA'] = pos['RushA/60'].rank(pct=True)
544
+ pos['Rushes FA'] = 1 - pos['RushA/60'].rank(pct=True)
545
545
  pos['RushFxG/60'] = (pos['RushFxG']/pos['TOI'])*60
546
546
  pos['RushAxG/60'] = (pos['RushAxG']/pos['TOI'])*60
547
547
  pos['Rushes xGF'] = pos['RushFxG/60'].rank(pct=True)
548
- pos['Rushes xGA'] = pos['RushAxG/60'].rank(pct=True)
548
+ pos['Rushes xGA'] = 1 - pos['RushAxG/60'].rank(pct=True)
549
549
  pos['RushFG/60'] = (pos['RushFG']/pos['TOI'])*60
550
550
  pos['RushAG/60'] = (pos['RushAG']/pos['TOI'])*60
551
551
  pos['Rushes GF'] = pos['RushFG/60'].rank(pct=True)
552
- pos['Rushes GA'] = pos['RushAG/60'].rank(pct=True)
552
+ pos['Rushes GA'] = 1 - pos['RushAG/60'].rank(pct=True)
553
553
 
554
554
  #Flip against metric percentiles
555
555
  pos['ODEF-SR'] = 1-pos['ODEF-SR']
@@ -642,7 +642,7 @@ def nhl_calculate_stats(pbp,type,season_types,game_strength,roster_path="rosters
642
642
  # param 'xg' - xG model to apply to pbp for aggregation
643
643
  # param 'shot_impact' - boolean determining if the shot impact model will be applied to the dataset
644
644
 
645
- print(f"Calculating statistics for all games in the provided play-by-play data...\nSeasons included: {pbp['season'].drop_duplicates().to_list()}...")
645
+ print(f"Calculating statistics for all games in the provided play-by-play data for {type}s...\nSeasons included: {pbp['season'].drop_duplicates().to_list()}...")
646
646
  start = time.perf_counter()
647
647
 
648
648
  #Add extra data and apply team changes
@@ -674,6 +674,9 @@ def nhl_calculate_stats(pbp,type,season_types,game_strength,roster_path="rosters
674
674
  if type == 'team':
675
675
  complete = calc_team(pbp)
676
676
 
677
+ #WSBA
678
+ complete['WSBA'] = complete['Team']+complete['Season'].astype(str)
679
+
677
680
  #Set TOI to minute
678
681
  complete['TOI'] = complete['TOI']/60
679
682
 
@@ -681,6 +684,14 @@ def nhl_calculate_stats(pbp,type,season_types,game_strength,roster_path="rosters
681
684
  for stat in per_sixty[7:13]:
682
685
  complete[f'{stat}/60'] = (complete[stat]/complete['TOI'])*60
683
686
 
687
+ #Rank per 60 stats
688
+ for stat in per_sixty[7:13]:
689
+ complete[f'{stat}/60 Percentile'] = complete[f'{stat}/60'].rank(pct=True)
690
+
691
+ #Flip percentiles for against stats
692
+ for stat in ['FA','xGA','GA']:
693
+ complete[f'{stat}/60 Percentile'] = 1-complete[f'{stat}/60 Percentile']
694
+
684
695
  end = time.perf_counter()
685
696
  length = end-start
686
697
  print(f'...finished in {(length if length <60 else length/60):.2f} {'seconds' if length <60 else 'minutes'}.')
@@ -735,6 +746,14 @@ def nhl_calculate_stats(pbp,type,season_types,game_strength,roster_path="rosters
735
746
  for stat in per_sixty:
736
747
  complete[f'{stat}/60'] = (complete[stat]/complete['TOI'])*60
737
748
 
749
+ #Rank per 60 stats
750
+ for stat in per_sixty:
751
+ complete[f'{stat}/60 Percentile'] = complete[f'{stat}/60'].rank(pct=True)
752
+
753
+ #Flip percentiles for against stats
754
+ for stat in ['FA','xGA','GA']:
755
+ complete[f'{stat}/60 Percentile'] = 1-complete[f'{stat}/60 Percentile']
756
+
738
757
  #Add player age
739
758
  complete['Birthday'] = pd.to_datetime(complete['Birthday'])
740
759
  complete['season_year'] = complete['Season'].astype(str).str[4:8].astype(int)
@@ -768,7 +787,7 @@ def nhl_calculate_stats(pbp,type,season_types,game_strength,roster_path="rosters
768
787
  "GF","FF","xGF","xGF/FF","GF/xGF","FshF%",
769
788
  "GA","FA","xGA","xGA/FA","GA/xGA","FshA%",
770
789
  'Rush',"Rush xG",'Rush G',"GC%","AC%","GI%","FC%","xGC%",
771
- ]+[f'{stat}/60' for stat in per_sixty]+type_metrics].fillna(0).sort_values(['Player','Season','Team','ID'])
790
+ ]+[f'{stat}/60' for stat in per_sixty]+[f'{stat}/60 Percentile' for stat in per_sixty]+type_metrics].fillna(0).sort_values(['Player','Season','Team','ID'])
772
791
 
773
792
  print(f'...finished in {(length if length <60 else length/60):.2f} {'seconds' if length <60 else 'minutes'}.')
774
793
  #Apply shot impacts if necessary (Note: this will remove skaters with fewer than 150 minutes of TOI due to the shot impact TOI rule)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: wsba_hockey
3
- Version: 1.0.2
3
+ Version: 1.0.3
4
4
  Summary: WeakSide Breakout's complete Python package of access to hockey data, primairly including the scraping of National Hockey League schedule, play-by-play, and shifts information.
5
5
  Author-email: Owen Singh <owenbksingh@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/owensingh38/wsba_hockey/
@@ -69,7 +69,7 @@ wsba.nhl_scrape_prospects('BOS')
69
69
  ### Stat Aggregation
70
70
  ```python
71
71
  pbp = wsba.nhl_scrape_season('20232024',remove=[], local = True)
72
- wsba.nhl_calculate_stats(pbp,'skater',[2],['5v5','4v4','3v3'],xg='moneypuck',shot_impact = True)
72
+ wsba.nhl_calculate_stats(pbp,'skater',[2],['5v5','4v4','3v3'],xg='wsba',shot_impact = True)
73
73
  ```
74
74
  ### Shot Plotting (Plots, Heatmaps, etc.)
75
75
  ```python
@@ -1,19 +1,19 @@
1
1
  wsba_hockey/__init__.py,sha256=QXyc8FFlCDWQuECyyEbj80ASHEbTFj4R13DOFOY9nJg,353
2
- wsba_hockey/workspace.py,sha256=HJ4ZJyL8OwrtknXKAqiptW8WxbJZp3kc0bobDwpKcLY,4875
3
- wsba_hockey/wsba_main.py,sha256=ZYfYpqJeUBnVHAjGrDWkEjoYW_8qWCilYukGuKUkJA4,37073
2
+ wsba_hockey/workspace.py,sha256=YQMHCAiCCsJiaSs_MZI_2tKuQp6dmQImIZw-RvjBEhA,5395
3
+ wsba_hockey/wsba_main.py,sha256=xyZLnOZxzPAi1-n9mBsQ8ThyGJkVotRg5I39SiM_CYs,37886
4
4
  wsba_hockey/stats/calculate_viz/shot_impact.py,sha256=7zxf64yt87YDucUBG31W75u951AUbMC7a3x5ClNIxYI,39
5
5
  wsba_hockey/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
6
  wsba_hockey/tools/agg.py,sha256=WRfuxJt0OgDSEkqHtuGql9nZrQnLGzkm6DUFVbDXayE,9560
7
- wsba_hockey/tools/plotting.py,sha256=mOkXO-ZWFTblsGpYSOZFxFJFad4sZBr62prVaatoLEw,6003
7
+ wsba_hockey/tools/plotting.py,sha256=olPGYJtVFIjBwrOQyz1CNNqaCM6iOJTKjWXBA3egJHM,6022
8
8
  wsba_hockey/tools/scraping.py,sha256=hZv1XMtQjsVSSVwN6Fzw7FuT94zSQGyX1WupTjUjUuU,48548
9
- wsba_hockey/tools/xg_model.py,sha256=vdmDq9CWFr-2AKUrnorjUyweUf9_NMdq5Xf3mbPserw,15158
9
+ wsba_hockey/tools/xg_model.py,sha256=ItVWGvYzi3zHA8mPCkUbpTU7NUcQFSw3Xm_nV0E6ypQ,16004
10
10
  wsba_hockey/tools/archive/old_scraping.py,sha256=hEjMI1RtfeZnf0RBiJFI38oXkLZ3WofeH5xqcF4pzgM,49585
11
11
  wsba_hockey/tools/utils/__init__.py,sha256=vccXhOtzARoR99fmEWU1OEI3qCIdQ9Z42AlRA_BUhrs,114
12
12
  wsba_hockey/tools/utils/config.py,sha256=D3Uk05-YTyrhfReMTTLfNI3HN_rON2uo_CDE9oER3Lg,351
13
13
  wsba_hockey/tools/utils/save_pages.py,sha256=CsyL_0n-b-4pJoUauwU3HpnCO6n69-RlBMJQBd_qGDc,4979
14
14
  wsba_hockey/tools/utils/shared.py,sha256=dH_JwZfia5fib8rksy5sW-mBp0pluBPvw37Vdr8Kap0,14211
15
- wsba_hockey-1.0.2.dist-info/licenses/LICENSE,sha256=Nr_Um1Pd5FQJTWWgm7maZArdtYMbDhzXYSwyJIZDGik,1114
16
- wsba_hockey-1.0.2.dist-info/METADATA,sha256=fBFZblpMy7vFKcM9MN4JBjJYSIIljdfVNd4GusrpbKU,3547
17
- wsba_hockey-1.0.2.dist-info/WHEEL,sha256=pxyMxgL8-pra_rKaQ4drOZAegBVuX-G_4nRHjjgWbmo,91
18
- wsba_hockey-1.0.2.dist-info/top_level.txt,sha256=acU7s3x-RZC1zGiqCOmO0g267iqCg34lzIfdmYxxGmQ,12
19
- wsba_hockey-1.0.2.dist-info/RECORD,,
15
+ wsba_hockey-1.0.3.dist-info/licenses/LICENSE,sha256=Nr_Um1Pd5FQJTWWgm7maZArdtYMbDhzXYSwyJIZDGik,1114
16
+ wsba_hockey-1.0.3.dist-info/METADATA,sha256=Cds728R4Mz7RqNuOA0mnMciv4wPUjkNShlPScxaKclw,3542
17
+ wsba_hockey-1.0.3.dist-info/WHEEL,sha256=pxyMxgL8-pra_rKaQ4drOZAegBVuX-G_4nRHjjgWbmo,91
18
+ wsba_hockey-1.0.3.dist-info/top_level.txt,sha256=acU7s3x-RZC1zGiqCOmO0g267iqCg34lzIfdmYxxGmQ,12
19
+ wsba_hockey-1.0.3.dist-info/RECORD,,