wsba-hockey 0.1.2__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- wsba_hockey/__init__.py +1 -1
- wsba_hockey/stats/calculate_viz/shot_impact.py +2 -0
- wsba_hockey/tools/__init__.py +0 -0
- wsba_hockey/tools/agg.py +185 -0
- wsba_hockey/tools/archive/old_scraping.py +1104 -0
- wsba_hockey/tools/plotting.py +113 -0
- wsba_hockey/tools/scraping.py +836 -369
- wsba_hockey/tools/utils/__init__.py +1 -0
- wsba_hockey/tools/utils/config.py +14 -0
- wsba_hockey/tools/utils/save_pages.py +133 -0
- wsba_hockey/tools/utils/shared.py +450 -0
- wsba_hockey/tools/xg_model.py +275 -47
- wsba_hockey/wsba_main.py +699 -132
- {wsba_hockey-0.1.2.dist-info → wsba_hockey-1.0.0.dist-info}/METADATA +42 -11
- wsba_hockey-1.0.0.dist-info/RECORD +18 -0
- {wsba_hockey-0.1.2.dist-info → wsba_hockey-1.0.0.dist-info}/WHEEL +1 -1
- wsba_hockey-0.1.2.dist-info/RECORD +0 -9
- {wsba_hockey-0.1.2.dist-info → wsba_hockey-1.0.0.dist-info}/licenses/LICENSE +0 -0
- {wsba_hockey-0.1.2.dist-info → wsba_hockey-1.0.0.dist-info}/top_level.txt +0 -0
wsba_hockey/__init__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
from
|
1
|
+
from .wsba_main import nhl_scrape_game,nhl_scrape_schedule,nhl_scrape_season,nhl_scrape_seasons_info,nhl_scrape_standings,nhl_scrape_roster,nhl_scrape_draft_rankings,nhl_scrape_prospects,nhl_calculate_stats,nhl_shooting_impacts,nhl_plot_skaters_shots,nhl_plot_games,repo_load_rosters,repo_load_schedule,repo_load_teaminfo,repo_load_pbp,repo_load_seasons
|
File without changes
|
wsba_hockey/tools/agg.py
ADDED
@@ -0,0 +1,185 @@
|
|
1
|
+
import pandas as pd
|
2
|
+
import numpy as np
|
3
|
+
from .xg_model import *
|
4
|
+
|
5
|
+
## AGGREGATE FUNCTIONS ##
|
6
|
+
|
7
|
+
## GLOBAL VARIABLES ##
|
8
|
+
shot_types = ['wrist','deflected','tip-in','slap','backhand','snap','wrap-around','poke','bat','cradle','between-legs']
|
9
|
+
fenwick_events = ['missed-shot','shot-on-goal','goal']
|
10
|
+
|
11
|
+
def calc_indv(pbp):
|
12
|
+
indv = (
|
13
|
+
pbp.loc[pbp['event_type'].isin(["goal", "shot-on-goal", "missed-shot"])].groupby(['event_player_1_id','event_team_abbr','season']).agg(
|
14
|
+
Gi=('event_type', lambda x: (x == "goal").sum()),
|
15
|
+
Fi=('event_type', lambda x: (x != "blocked-shot").sum()),
|
16
|
+
xGi=('xG', 'sum'),
|
17
|
+
Rush=('rush_mod',lambda x: (x > 0).sum())
|
18
|
+
).reset_index().rename(columns={'event_player_1_id': 'ID', 'event_team_abbr': 'Team', 'season': 'Season'})
|
19
|
+
)
|
20
|
+
|
21
|
+
rush_xg = (
|
22
|
+
pbp.loc[(pbp['event_type'].isin(["goal", "shot-on-goal", "missed-shot"]))&(pbp['rush_mod']>0)].groupby(['event_player_1_id','event_team_abbr','season']).agg(
|
23
|
+
Rush_G=('event_type', lambda x:(x == 'goal').sum()),
|
24
|
+
Rush_xG=('xG','sum')
|
25
|
+
).reset_index().rename(columns={'event_player_1_id': 'ID', 'event_team_abbr': 'Team', 'season': 'Season', 'Rush_G':'Rush G','Rush_xG':'Rush xG'})
|
26
|
+
)
|
27
|
+
|
28
|
+
a1 = (
|
29
|
+
pbp.loc[pbp['event_type'].isin(["goal"])].groupby(['event_player_2_id', 'event_team_abbr','season']).agg(
|
30
|
+
A1=('event_type','count')
|
31
|
+
).reset_index().rename(columns={'event_player_2_id': 'ID', 'event_team_abbr': 'Team', 'season': 'Season'})
|
32
|
+
)
|
33
|
+
|
34
|
+
a2 = (
|
35
|
+
pbp.loc[pbp['event_type'].isin(["goal"])].groupby(['event_player_3_id', 'event_team_abbr', 'season']).agg(
|
36
|
+
A2=('event_type','count')
|
37
|
+
).reset_index().rename(columns={'event_player_3_id': 'ID', 'event_team_abbr': 'Team', 'season': 'Season'})
|
38
|
+
)
|
39
|
+
indv = pd.merge(indv,rush_xg,how='outer',on=['ID','Team','Season'])
|
40
|
+
indv = pd.merge(indv,a1,how='outer',on=['ID','Team','Season'])
|
41
|
+
indv = pd.merge(indv,a2,how='outer',on=['ID','Team','Season'])
|
42
|
+
|
43
|
+
#Shot Types
|
44
|
+
for type in shot_types:
|
45
|
+
shot = (
|
46
|
+
pbp.loc[(pbp['event_type'].isin(["goal", "shot-on-goal", "missed-shot"])&(pbp['shot_type']==type))].groupby(['event_player_1_id', 'event_team_abbr', 'season']).agg(
|
47
|
+
Gi=('event_type', lambda x: (x == "goal").sum()),
|
48
|
+
Fi=('event_type', lambda x: (x != "blocked-shot").sum()),
|
49
|
+
xGi=('xG', 'sum'),
|
50
|
+
).reset_index().rename(columns={'event_player_1_id': 'ID', 'event_team_abbr': 'Team', 'season': 'Season'})
|
51
|
+
)
|
52
|
+
|
53
|
+
shot = shot.rename(columns={
|
54
|
+
'Gi':f'{type.capitalize()}Gi',
|
55
|
+
'Fi':f'{type.capitalize()}Fi',
|
56
|
+
'xGi':f'{type.capitalize()}xGi',
|
57
|
+
})
|
58
|
+
indv = pd.merge(indv,shot,how='outer',on=['ID','Team','Season'])
|
59
|
+
|
60
|
+
indv[['Gi','A1','A2']] = indv[['Gi','A1','A2']].fillna(0)
|
61
|
+
|
62
|
+
indv['P1'] = indv['Gi']+indv['A1']
|
63
|
+
indv['P'] = indv['P1']+indv['A2']
|
64
|
+
indv['xGi/Fi'] = indv['xGi']/indv['Fi']
|
65
|
+
indv['Gi/xGi'] = indv['Gi']/indv['xGi']
|
66
|
+
indv['Fshi%'] = indv['Gi']/indv['Fi']
|
67
|
+
|
68
|
+
return indv
|
69
|
+
|
70
|
+
def calc_onice(pbp):
|
71
|
+
# Convert player on-ice columns to vectors
|
72
|
+
pbp['home_on_ice'] = pbp['home_on_1_id'].astype(str) + ";" + pbp['home_on_2_id'].astype(str) + ";" + pbp['home_on_3_id'].astype(str) + ";" + pbp['home_on_4_id'].astype(str) + ";" + pbp['home_on_5_id'].astype(str) + ";" + pbp['home_on_6_id'].astype(str)
|
73
|
+
pbp['away_on_ice'] = pbp['away_on_1_id'].astype(str) + ";" + pbp['away_on_2_id'].astype(str) + ";" + pbp['away_on_3_id'].astype(str) + ";" + pbp['away_on_4_id'].astype(str) + ";" + pbp['away_on_5_id'].astype(str) + ";" + pbp['away_on_6_id'].astype(str)
|
74
|
+
|
75
|
+
# Remove NA players
|
76
|
+
pbp['home_on_ice'] = pbp['home_on_ice'].str.replace(';nan', '', regex=True)
|
77
|
+
pbp['away_on_ice'] = pbp['away_on_ice'].str.replace(';nan', '', regex=True)
|
78
|
+
|
79
|
+
def process_team_stats(df, on_ice_col, team_col, opp_col):
|
80
|
+
df = df[['season','game_id', 'event_num', team_col, opp_col, 'event_type', 'event_team_abbr', on_ice_col,'event_length','xG']].copy()
|
81
|
+
df[on_ice_col] = df[on_ice_col].str.split(';')
|
82
|
+
df = df.explode(on_ice_col)
|
83
|
+
df = df.rename(columns={on_ice_col: 'ID', 'season': 'Season'})
|
84
|
+
df['xGF'] = np.where(df['event_team_abbr'] == df[team_col], df['xG'], 0)
|
85
|
+
df['xGA'] = np.where(df['event_team_abbr'] == df[opp_col], df['xG'], 0)
|
86
|
+
df['GF'] = np.where((df['event_type'] == "goal") & (df['event_team_abbr'] == df[team_col]), 1, 0)
|
87
|
+
df['GA'] = np.where((df['event_type'] == "goal") & (df['event_team_abbr'] == df[opp_col]), 1, 0)
|
88
|
+
df['FF'] = np.where((df['event_type'].isin(fenwick_events)) & (df['event_team_abbr'] == df[team_col]), 1, 0)
|
89
|
+
df['FA'] = np.where((df['event_type'].isin(fenwick_events)) & (df['event_team_abbr'] == df[opp_col]), 1, 0)
|
90
|
+
|
91
|
+
stats = df.groupby(['ID',team_col,'Season']).agg(
|
92
|
+
GP=('game_id','nunique'),
|
93
|
+
TOI=('event_length','sum'),
|
94
|
+
FF=('FF', 'sum'),
|
95
|
+
FA=('FA', 'sum'),
|
96
|
+
GF=('GF', 'sum'),
|
97
|
+
GA=('GA', 'sum'),
|
98
|
+
xGF=('xGF', 'sum'),
|
99
|
+
xGA=('xGA', 'sum')
|
100
|
+
).reset_index()
|
101
|
+
|
102
|
+
return stats.rename(columns={team_col:"Team"})
|
103
|
+
|
104
|
+
home_stats = process_team_stats(pbp, 'home_on_ice', 'home_team_abbr', 'away_team_abbr')
|
105
|
+
away_stats = process_team_stats(pbp, 'away_on_ice', 'away_team_abbr', 'home_team_abbr')
|
106
|
+
|
107
|
+
onice_stats = pd.concat([home_stats,away_stats]).groupby(['ID','Team','Season']).agg(
|
108
|
+
GP=('GP','sum'),
|
109
|
+
TOI=('TOI','sum'),
|
110
|
+
FF=('FF', 'sum'),
|
111
|
+
FA=('FA', 'sum'),
|
112
|
+
GF=('GF', 'sum'),
|
113
|
+
GA=('GA', 'sum'),
|
114
|
+
xGF=('xGF', 'sum'),
|
115
|
+
xGA=('xGA', 'sum')
|
116
|
+
).reset_index()
|
117
|
+
|
118
|
+
onice_stats['xGF/FF'] = onice_stats['xGF']/onice_stats['FF']
|
119
|
+
onice_stats['GF/xGF'] = onice_stats['GF']/onice_stats['xGF']
|
120
|
+
onice_stats['FshF%'] = onice_stats['GF']/onice_stats['FF']
|
121
|
+
onice_stats['xGA/FA'] = onice_stats['xGA']/onice_stats['FA']
|
122
|
+
onice_stats['GA/xGA'] = onice_stats['GA']/onice_stats['xGA']
|
123
|
+
onice_stats['FshA%'] = onice_stats['GA']/onice_stats['FA']
|
124
|
+
|
125
|
+
return onice_stats
|
126
|
+
|
127
|
+
def calc_team(pbp):
|
128
|
+
teams = []
|
129
|
+
for team in [('away','home'),('home','away')]:
|
130
|
+
pbp['xGF'] = np.where(pbp['event_team_abbr'] == pbp[f'{team[0]}_team_abbr'], pbp['xG'], 0)
|
131
|
+
pbp['xGA'] = np.where(pbp['event_team_abbr'] == pbp[f'{team[1]}_team_abbr'], pbp['xG'], 0)
|
132
|
+
pbp['GF'] = np.where((pbp['event_type'] == "goal") & (pbp['event_team_abbr'] == pbp[f'{team[0]}_team_abbr']), 1, 0)
|
133
|
+
pbp['GA'] = np.where((pbp['event_type'] == "goal") & (pbp['event_team_abbr'] == pbp[f'{team[1]}_team_abbr']), 1, 0)
|
134
|
+
pbp['FF'] = np.where((pbp['event_type'].isin(fenwick_events)) & (pbp['event_team_abbr'] == pbp[f'{team[0]}_team_abbr']), 1, 0)
|
135
|
+
pbp['FA'] = np.where((pbp['event_type'].isin(fenwick_events)) & (pbp['event_team_abbr'] == pbp[f'{team[1]}_team_abbr']), 1, 0)
|
136
|
+
pbp['RushF'] = np.where((pbp['event_team_abbr'] == pbp[f'{team[0]}_team_abbr'])&(pbp['rush_mod']>0), 1, 0)
|
137
|
+
pbp['RushA'] = np.where((pbp['event_team_abbr'] == pbp[f'{team[1]}_team_abbr'])&(pbp['rush_mod']>0), 1, 0)
|
138
|
+
pbp['RushFxG'] = np.where((pbp['event_team_abbr'] == pbp[f'{team[0]}_team_abbr'])&(pbp['rush_mod']>0), pbp['xG'], 0)
|
139
|
+
pbp['RushAxG'] = np.where((pbp['event_team_abbr'] == pbp[f'{team[1]}_team_abbr'])&(pbp['rush_mod']>0), pbp['xG'], 0)
|
140
|
+
pbp['RushFG'] = np.where((pbp['event_type'] == "goal") & (pbp['event_team_abbr'] == pbp[f'{team[0]}_team_abbr'])&(pbp['rush_mod']>0), 1, 0)
|
141
|
+
pbp['RushAG'] = np.where((pbp['event_type'] == "goal") & (pbp['event_team_abbr'] == pbp[f'{team[1]}_team_abbr'])&(pbp['rush_mod']>0), 1, 0)
|
142
|
+
|
143
|
+
stats = pbp.groupby([f'{team[0]}_team_abbr','season']).agg(
|
144
|
+
GP=('game_id','nunique'),
|
145
|
+
TOI=('event_length','sum'),
|
146
|
+
FF=('FF', 'sum'),
|
147
|
+
FA=('FA', 'sum'),
|
148
|
+
GF=('GF', 'sum'),
|
149
|
+
GA=('GA', 'sum'),
|
150
|
+
xGF=('xGF', 'sum'),
|
151
|
+
xGA=('xGA', 'sum'),
|
152
|
+
RushF=('RushF','sum'),
|
153
|
+
RushA=('RushA','sum'),
|
154
|
+
RushFxG=('RushFxG','sum'),
|
155
|
+
RushAxG=('RushAxG','sum'),
|
156
|
+
RushFG=('RushFG','sum'),
|
157
|
+
RushAG=('RushAG','sum'),
|
158
|
+
).reset_index().rename(columns={f'{team[0]}_team_abbr':"Team",'season':"Season"})
|
159
|
+
teams.append(stats)
|
160
|
+
|
161
|
+
onice_stats = pd.concat(teams).groupby(['Team','Season']).agg(
|
162
|
+
GP=('GP','sum'),
|
163
|
+
TOI=('TOI','sum'),
|
164
|
+
FF=('FF', 'sum'),
|
165
|
+
FA=('FA', 'sum'),
|
166
|
+
GF=('GF', 'sum'),
|
167
|
+
GA=('GA', 'sum'),
|
168
|
+
xGF=('xGF', 'sum'),
|
169
|
+
xGA=('xGA', 'sum'),
|
170
|
+
RushF=('RushF','sum'),
|
171
|
+
RushA=('RushA','sum'),
|
172
|
+
RushFxG=('RushFxG','sum'),
|
173
|
+
RushAxG=('RushAxG','sum'),
|
174
|
+
RushFG=('RushFG','sum'),
|
175
|
+
RushAG=('RushAG','sum'),
|
176
|
+
).reset_index()
|
177
|
+
|
178
|
+
onice_stats['xGF/FF'] = onice_stats['xGF']/onice_stats['FF']
|
179
|
+
onice_stats['GF/xGF'] = onice_stats['GF']/onice_stats['xGF']
|
180
|
+
onice_stats['FshF%'] = onice_stats['GF']/onice_stats['FF']
|
181
|
+
onice_stats['xGA/FA'] = onice_stats['xGA']/onice_stats['FA']
|
182
|
+
onice_stats['GA/xGA'] = onice_stats['GA']/onice_stats['xGA']
|
183
|
+
onice_stats['FshA%'] = onice_stats['GA']/onice_stats['FA']
|
184
|
+
|
185
|
+
return onice_stats
|