wsba-hockey 0.1.2__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
wsba_hockey/__init__.py CHANGED
@@ -1 +1 @@
1
- from wsba_hockey.wsba_main import nhl_scrape_game,nhl_scrape_schedule,nhl_scrape_season,nhl_scrape_seasons_info,nhl_scrape_standings,nhl_scrape_roster,nhl_scrape_player_info,repo_load_rosters,repo_load_schedule,repo_load_teaminfo
1
+ from .wsba_main import nhl_scrape_game,nhl_scrape_schedule,nhl_scrape_season,nhl_scrape_seasons_info,nhl_scrape_standings,nhl_scrape_roster,nhl_scrape_draft_rankings,nhl_scrape_prospects,nhl_calculate_stats,nhl_shooting_impacts,nhl_plot_skaters_shots,nhl_plot_games,repo_load_rosters,repo_load_schedule,repo_load_teaminfo,repo_load_pbp,repo_load_seasons
@@ -0,0 +1,2 @@
1
+ import pandas as pd
2
+ import numpy as np
File without changes
@@ -0,0 +1,185 @@
1
+ import pandas as pd
2
+ import numpy as np
3
+ from .xg_model import *
4
+
5
+ ## AGGREGATE FUNCTIONS ##
6
+
7
+ ## GLOBAL VARIABLES ##
8
+ shot_types = ['wrist','deflected','tip-in','slap','backhand','snap','wrap-around','poke','bat','cradle','between-legs']
9
+ fenwick_events = ['missed-shot','shot-on-goal','goal']
10
+
11
+ def calc_indv(pbp):
12
+ indv = (
13
+ pbp.loc[pbp['event_type'].isin(["goal", "shot-on-goal", "missed-shot"])].groupby(['event_player_1_id','event_team_abbr','season']).agg(
14
+ Gi=('event_type', lambda x: (x == "goal").sum()),
15
+ Fi=('event_type', lambda x: (x != "blocked-shot").sum()),
16
+ xGi=('xG', 'sum'),
17
+ Rush=('rush_mod',lambda x: (x > 0).sum())
18
+ ).reset_index().rename(columns={'event_player_1_id': 'ID', 'event_team_abbr': 'Team', 'season': 'Season'})
19
+ )
20
+
21
+ rush_xg = (
22
+ pbp.loc[(pbp['event_type'].isin(["goal", "shot-on-goal", "missed-shot"]))&(pbp['rush_mod']>0)].groupby(['event_player_1_id','event_team_abbr','season']).agg(
23
+ Rush_G=('event_type', lambda x:(x == 'goal').sum()),
24
+ Rush_xG=('xG','sum')
25
+ ).reset_index().rename(columns={'event_player_1_id': 'ID', 'event_team_abbr': 'Team', 'season': 'Season', 'Rush_G':'Rush G','Rush_xG':'Rush xG'})
26
+ )
27
+
28
+ a1 = (
29
+ pbp.loc[pbp['event_type'].isin(["goal"])].groupby(['event_player_2_id', 'event_team_abbr','season']).agg(
30
+ A1=('event_type','count')
31
+ ).reset_index().rename(columns={'event_player_2_id': 'ID', 'event_team_abbr': 'Team', 'season': 'Season'})
32
+ )
33
+
34
+ a2 = (
35
+ pbp.loc[pbp['event_type'].isin(["goal"])].groupby(['event_player_3_id', 'event_team_abbr', 'season']).agg(
36
+ A2=('event_type','count')
37
+ ).reset_index().rename(columns={'event_player_3_id': 'ID', 'event_team_abbr': 'Team', 'season': 'Season'})
38
+ )
39
+ indv = pd.merge(indv,rush_xg,how='outer',on=['ID','Team','Season'])
40
+ indv = pd.merge(indv,a1,how='outer',on=['ID','Team','Season'])
41
+ indv = pd.merge(indv,a2,how='outer',on=['ID','Team','Season'])
42
+
43
+ #Shot Types
44
+ for type in shot_types:
45
+ shot = (
46
+ pbp.loc[(pbp['event_type'].isin(["goal", "shot-on-goal", "missed-shot"])&(pbp['shot_type']==type))].groupby(['event_player_1_id', 'event_team_abbr', 'season']).agg(
47
+ Gi=('event_type', lambda x: (x == "goal").sum()),
48
+ Fi=('event_type', lambda x: (x != "blocked-shot").sum()),
49
+ xGi=('xG', 'sum'),
50
+ ).reset_index().rename(columns={'event_player_1_id': 'ID', 'event_team_abbr': 'Team', 'season': 'Season'})
51
+ )
52
+
53
+ shot = shot.rename(columns={
54
+ 'Gi':f'{type.capitalize()}Gi',
55
+ 'Fi':f'{type.capitalize()}Fi',
56
+ 'xGi':f'{type.capitalize()}xGi',
57
+ })
58
+ indv = pd.merge(indv,shot,how='outer',on=['ID','Team','Season'])
59
+
60
+ indv[['Gi','A1','A2']] = indv[['Gi','A1','A2']].fillna(0)
61
+
62
+ indv['P1'] = indv['Gi']+indv['A1']
63
+ indv['P'] = indv['P1']+indv['A2']
64
+ indv['xGi/Fi'] = indv['xGi']/indv['Fi']
65
+ indv['Gi/xGi'] = indv['Gi']/indv['xGi']
66
+ indv['Fshi%'] = indv['Gi']/indv['Fi']
67
+
68
+ return indv
69
+
70
+ def calc_onice(pbp):
71
+ # Convert player on-ice columns to vectors
72
+ pbp['home_on_ice'] = pbp['home_on_1_id'].astype(str) + ";" + pbp['home_on_2_id'].astype(str) + ";" + pbp['home_on_3_id'].astype(str) + ";" + pbp['home_on_4_id'].astype(str) + ";" + pbp['home_on_5_id'].astype(str) + ";" + pbp['home_on_6_id'].astype(str)
73
+ pbp['away_on_ice'] = pbp['away_on_1_id'].astype(str) + ";" + pbp['away_on_2_id'].astype(str) + ";" + pbp['away_on_3_id'].astype(str) + ";" + pbp['away_on_4_id'].astype(str) + ";" + pbp['away_on_5_id'].astype(str) + ";" + pbp['away_on_6_id'].astype(str)
74
+
75
+ # Remove NA players
76
+ pbp['home_on_ice'] = pbp['home_on_ice'].str.replace(';nan', '', regex=True)
77
+ pbp['away_on_ice'] = pbp['away_on_ice'].str.replace(';nan', '', regex=True)
78
+
79
+ def process_team_stats(df, on_ice_col, team_col, opp_col):
80
+ df = df[['season','game_id', 'event_num', team_col, opp_col, 'event_type', 'event_team_abbr', on_ice_col,'event_length','xG']].copy()
81
+ df[on_ice_col] = df[on_ice_col].str.split(';')
82
+ df = df.explode(on_ice_col)
83
+ df = df.rename(columns={on_ice_col: 'ID', 'season': 'Season'})
84
+ df['xGF'] = np.where(df['event_team_abbr'] == df[team_col], df['xG'], 0)
85
+ df['xGA'] = np.where(df['event_team_abbr'] == df[opp_col], df['xG'], 0)
86
+ df['GF'] = np.where((df['event_type'] == "goal") & (df['event_team_abbr'] == df[team_col]), 1, 0)
87
+ df['GA'] = np.where((df['event_type'] == "goal") & (df['event_team_abbr'] == df[opp_col]), 1, 0)
88
+ df['FF'] = np.where((df['event_type'].isin(fenwick_events)) & (df['event_team_abbr'] == df[team_col]), 1, 0)
89
+ df['FA'] = np.where((df['event_type'].isin(fenwick_events)) & (df['event_team_abbr'] == df[opp_col]), 1, 0)
90
+
91
+ stats = df.groupby(['ID',team_col,'Season']).agg(
92
+ GP=('game_id','nunique'),
93
+ TOI=('event_length','sum'),
94
+ FF=('FF', 'sum'),
95
+ FA=('FA', 'sum'),
96
+ GF=('GF', 'sum'),
97
+ GA=('GA', 'sum'),
98
+ xGF=('xGF', 'sum'),
99
+ xGA=('xGA', 'sum')
100
+ ).reset_index()
101
+
102
+ return stats.rename(columns={team_col:"Team"})
103
+
104
+ home_stats = process_team_stats(pbp, 'home_on_ice', 'home_team_abbr', 'away_team_abbr')
105
+ away_stats = process_team_stats(pbp, 'away_on_ice', 'away_team_abbr', 'home_team_abbr')
106
+
107
+ onice_stats = pd.concat([home_stats,away_stats]).groupby(['ID','Team','Season']).agg(
108
+ GP=('GP','sum'),
109
+ TOI=('TOI','sum'),
110
+ FF=('FF', 'sum'),
111
+ FA=('FA', 'sum'),
112
+ GF=('GF', 'sum'),
113
+ GA=('GA', 'sum'),
114
+ xGF=('xGF', 'sum'),
115
+ xGA=('xGA', 'sum')
116
+ ).reset_index()
117
+
118
+ onice_stats['xGF/FF'] = onice_stats['xGF']/onice_stats['FF']
119
+ onice_stats['GF/xGF'] = onice_stats['GF']/onice_stats['xGF']
120
+ onice_stats['FshF%'] = onice_stats['GF']/onice_stats['FF']
121
+ onice_stats['xGA/FA'] = onice_stats['xGA']/onice_stats['FA']
122
+ onice_stats['GA/xGA'] = onice_stats['GA']/onice_stats['xGA']
123
+ onice_stats['FshA%'] = onice_stats['GA']/onice_stats['FA']
124
+
125
+ return onice_stats
126
+
127
+ def calc_team(pbp):
128
+ teams = []
129
+ for team in [('away','home'),('home','away')]:
130
+ pbp['xGF'] = np.where(pbp['event_team_abbr'] == pbp[f'{team[0]}_team_abbr'], pbp['xG'], 0)
131
+ pbp['xGA'] = np.where(pbp['event_team_abbr'] == pbp[f'{team[1]}_team_abbr'], pbp['xG'], 0)
132
+ pbp['GF'] = np.where((pbp['event_type'] == "goal") & (pbp['event_team_abbr'] == pbp[f'{team[0]}_team_abbr']), 1, 0)
133
+ pbp['GA'] = np.where((pbp['event_type'] == "goal") & (pbp['event_team_abbr'] == pbp[f'{team[1]}_team_abbr']), 1, 0)
134
+ pbp['FF'] = np.where((pbp['event_type'].isin(fenwick_events)) & (pbp['event_team_abbr'] == pbp[f'{team[0]}_team_abbr']), 1, 0)
135
+ pbp['FA'] = np.where((pbp['event_type'].isin(fenwick_events)) & (pbp['event_team_abbr'] == pbp[f'{team[1]}_team_abbr']), 1, 0)
136
+ pbp['RushF'] = np.where((pbp['event_team_abbr'] == pbp[f'{team[0]}_team_abbr'])&(pbp['rush_mod']>0), 1, 0)
137
+ pbp['RushA'] = np.where((pbp['event_team_abbr'] == pbp[f'{team[1]}_team_abbr'])&(pbp['rush_mod']>0), 1, 0)
138
+ pbp['RushFxG'] = np.where((pbp['event_team_abbr'] == pbp[f'{team[0]}_team_abbr'])&(pbp['rush_mod']>0), pbp['xG'], 0)
139
+ pbp['RushAxG'] = np.where((pbp['event_team_abbr'] == pbp[f'{team[1]}_team_abbr'])&(pbp['rush_mod']>0), pbp['xG'], 0)
140
+ pbp['RushFG'] = np.where((pbp['event_type'] == "goal") & (pbp['event_team_abbr'] == pbp[f'{team[0]}_team_abbr'])&(pbp['rush_mod']>0), 1, 0)
141
+ pbp['RushAG'] = np.where((pbp['event_type'] == "goal") & (pbp['event_team_abbr'] == pbp[f'{team[1]}_team_abbr'])&(pbp['rush_mod']>0), 1, 0)
142
+
143
+ stats = pbp.groupby([f'{team[0]}_team_abbr','season']).agg(
144
+ GP=('game_id','nunique'),
145
+ TOI=('event_length','sum'),
146
+ FF=('FF', 'sum'),
147
+ FA=('FA', 'sum'),
148
+ GF=('GF', 'sum'),
149
+ GA=('GA', 'sum'),
150
+ xGF=('xGF', 'sum'),
151
+ xGA=('xGA', 'sum'),
152
+ RushF=('RushF','sum'),
153
+ RushA=('RushA','sum'),
154
+ RushFxG=('RushFxG','sum'),
155
+ RushAxG=('RushAxG','sum'),
156
+ RushFG=('RushFG','sum'),
157
+ RushAG=('RushAG','sum'),
158
+ ).reset_index().rename(columns={f'{team[0]}_team_abbr':"Team",'season':"Season"})
159
+ teams.append(stats)
160
+
161
+ onice_stats = pd.concat(teams).groupby(['Team','Season']).agg(
162
+ GP=('GP','sum'),
163
+ TOI=('TOI','sum'),
164
+ FF=('FF', 'sum'),
165
+ FA=('FA', 'sum'),
166
+ GF=('GF', 'sum'),
167
+ GA=('GA', 'sum'),
168
+ xGF=('xGF', 'sum'),
169
+ xGA=('xGA', 'sum'),
170
+ RushF=('RushF','sum'),
171
+ RushA=('RushA','sum'),
172
+ RushFxG=('RushFxG','sum'),
173
+ RushAxG=('RushAxG','sum'),
174
+ RushFG=('RushFG','sum'),
175
+ RushAG=('RushAG','sum'),
176
+ ).reset_index()
177
+
178
+ onice_stats['xGF/FF'] = onice_stats['xGF']/onice_stats['FF']
179
+ onice_stats['GF/xGF'] = onice_stats['GF']/onice_stats['xGF']
180
+ onice_stats['FshF%'] = onice_stats['GF']/onice_stats['FF']
181
+ onice_stats['xGA/FA'] = onice_stats['xGA']/onice_stats['FA']
182
+ onice_stats['GA/xGA'] = onice_stats['GA']/onice_stats['xGA']
183
+ onice_stats['FshA%'] = onice_stats['GA']/onice_stats['FA']
184
+
185
+ return onice_stats