wsba-hockey 1.1.3__py3-none-any.whl → 1.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- wsba_hockey/api/api/index.py +68 -28
- wsba_hockey/data_pipelines.py +3 -2
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/duos/app.py +210 -0
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/duos/calc.py +163 -0
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/line-combos/app.py +245 -0
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/line-combos/plot.py +275 -0
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/line-combos/rink_plot.py +245 -0
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/matchups/app.py +2 -2
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/pbp/app.py +1 -0
- wsba_hockey/tools/agg.py +29 -0
- wsba_hockey/tools/scraping.py +168 -4
- wsba_hockey/workspace.py +4 -24
- wsba_hockey/wsba_main.py +22 -18
- {wsba_hockey-1.1.3.dist-info → wsba_hockey-1.1.5.dist-info}/METADATA +9 -5
- {wsba_hockey-1.1.3.dist-info → wsba_hockey-1.1.5.dist-info}/RECORD +18 -13
- {wsba_hockey-1.1.3.dist-info → wsba_hockey-1.1.5.dist-info}/WHEEL +0 -0
- {wsba_hockey-1.1.3.dist-info → wsba_hockey-1.1.5.dist-info}/licenses/LICENSE +0 -0
- {wsba_hockey-1.1.3.dist-info → wsba_hockey-1.1.5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,245 @@
|
|
1
|
+
|
2
|
+
import numpy as np
|
3
|
+
import plotly.graph_objects as go
|
4
|
+
import io
|
5
|
+
import base64
|
6
|
+
import requests as rs
|
7
|
+
from PIL import Image
|
8
|
+
|
9
|
+
def rink(setting = "full", vertical = False):
|
10
|
+
'''
|
11
|
+
Function to plot rink in Plotly. Takes 2 arguments :
|
12
|
+
|
13
|
+
setting : full (default) for full ice, offense positive half of the ice, ozone positive quarter of ice, defense for negative half of the ice, dzone for negative quarter of the ice, and neutral for the neutral zone
|
14
|
+
vertical : True if you want a vertical rink, False (default) is for an horizontal rink
|
15
|
+
|
16
|
+
'''
|
17
|
+
|
18
|
+
def faceoff_circle(x, y, outer=True):
|
19
|
+
segments = []
|
20
|
+
theta = np.linspace(0, 2*np.pi, 300)
|
21
|
+
if outer:
|
22
|
+
# Outer circle
|
23
|
+
x_outer = x + 15*np.cos(theta)
|
24
|
+
y_outer = y + 15*np.sin(theta)
|
25
|
+
outer_circle = go.Scatter(x=x_outer, y=y_outer, mode='lines', line=dict(width=2, color='red'), showlegend=False, hoverinfo='skip')
|
26
|
+
|
27
|
+
segments.append(outer_circle)
|
28
|
+
|
29
|
+
# Inner circle
|
30
|
+
x_inner = x + np.cos(theta)
|
31
|
+
y_inner = y + np.sin(theta)
|
32
|
+
inner_circle = go.Scatter(x=x_inner, y=y_inner, mode='lines', fill='toself', fillcolor='rgba(255, 0, 0, 0.43)', line=dict(color='rgba(255, 0, 0, 1)', width=2), showlegend=False, hoverinfo='skip')
|
33
|
+
|
34
|
+
segments.append(inner_circle)
|
35
|
+
|
36
|
+
return segments #segments
|
37
|
+
|
38
|
+
fig = go.Figure()
|
39
|
+
|
40
|
+
if vertical :
|
41
|
+
setting_dict = {
|
42
|
+
"full" : [-101, 101],
|
43
|
+
"offense" : [0, 101],
|
44
|
+
"ozone" : [25, 101],
|
45
|
+
"defense" : [-101, 0],
|
46
|
+
"dzone" : [-101, -25],
|
47
|
+
"neutral" : [-25,25]
|
48
|
+
}
|
49
|
+
fig.update_layout(xaxis=dict(range=[-42.6, 42.6], showgrid=False, zeroline=False, showticklabels=False, constrain="domain"), yaxis=dict(range=setting_dict[setting], showgrid=False, zeroline=False, showticklabels=False, constrain="domain"),
|
50
|
+
showlegend=False, autosize=True, template="plotly_white")
|
51
|
+
fig.update_yaxes(
|
52
|
+
scaleanchor="x",
|
53
|
+
scaleratio=1,
|
54
|
+
)
|
55
|
+
def goal_crease(flip=1):
|
56
|
+
x_seq = np.linspace(-4, 4, 100)
|
57
|
+
x_goal = np.concatenate(([-4], x_seq, [4]))
|
58
|
+
y_goal = flip * np.concatenate(([89], 83 + x_seq**2/4**2*1.5, [89]))
|
59
|
+
goal_crease = go.Scatter(x=x_goal, y=y_goal, fill='toself', fillcolor='rgba(173, 216, 230, 0.3)', line=dict(color='red'))
|
60
|
+
return goal_crease
|
61
|
+
|
62
|
+
# Outer circle
|
63
|
+
theta = np.linspace(0, 2*np.pi, 300)
|
64
|
+
x_outer = 15 * np.cos(theta)
|
65
|
+
y_outer = 15 * np.sin(theta)
|
66
|
+
fig.add_trace(go.Scatter(x=x_outer, y=y_outer, mode='lines', line=dict(color='royalblue', width=2), showlegend=False, hoverinfo='skip'))
|
67
|
+
# Inner circle
|
68
|
+
theta2 = np.linspace(np.pi/2, 3*np.pi/2, 300)
|
69
|
+
x_inner = 42.5 + 10 * np.cos(theta2)
|
70
|
+
y_inner = 10 * np.sin(theta2)
|
71
|
+
fig.add_trace(go.Scatter(x=x_inner, y=y_inner, mode='lines', line=dict(color='red', width=2), showlegend=False, hoverinfo='skip'))
|
72
|
+
# Rink boundaries
|
73
|
+
fig.add_shape(type='rect', xref='x', yref='y', x0=-42.5, y0=25, x1=42.5, y1=26, line=dict(color='royalblue', width=1), fillcolor='royalblue', opacity=1)
|
74
|
+
fig.add_shape(type='rect', xref='x', yref='y', x0=-42.5, y0=-25, x1=42.5, y1=-26, line=dict(color='royalblue', width=1), fillcolor='royalblue', opacity=1)
|
75
|
+
fig.add_shape(type='rect', xref='x', yref='y', x0=-42.5, y0=-0.5, x1=42.5, y1=0.5, line=dict(color='red', width=2), fillcolor='red')
|
76
|
+
|
77
|
+
# Goal crease
|
78
|
+
fig.add_trace(goal_crease())
|
79
|
+
fig.add_trace(goal_crease(-1))
|
80
|
+
# Goal lines
|
81
|
+
goal_line_extreme = 42.5 - 28 + np.sqrt(28**2 - (28-11)**2)
|
82
|
+
fig.add_shape(type='line', xref='x', yref='y', x0=-goal_line_extreme, y0=89, x1=goal_line_extreme, y1=89, line=dict(color='red', width=2))
|
83
|
+
fig.add_shape(type='line', xref='x', yref='y', x0=-goal_line_extreme, y0=-89, x1=goal_line_extreme, y1=-89, line=dict(color='red', width=2))
|
84
|
+
|
85
|
+
# Faceoff circles
|
86
|
+
fig.add_traces(faceoff_circle(-22, 69))
|
87
|
+
fig.add_traces(faceoff_circle(22, 69))
|
88
|
+
fig.add_traces(faceoff_circle(-22, -69))
|
89
|
+
fig.add_traces(faceoff_circle(22, -69))
|
90
|
+
fig.add_traces(faceoff_circle(-22, -20, False))
|
91
|
+
fig.add_traces(faceoff_circle(22, -20, False))
|
92
|
+
fig.add_traces(faceoff_circle(-22, 20, False))
|
93
|
+
fig.add_traces(faceoff_circle(22, 20, False))
|
94
|
+
|
95
|
+
# Sidelines
|
96
|
+
theta_lines = np.linspace(0, np.pi/2, 20)
|
97
|
+
x_lines1 = np.concatenate(([-42.5], -42.5 + 28 - 28*np.cos(theta_lines), 42.5 - 28 + 28*np.cos(np.flip(theta_lines))))
|
98
|
+
y_lines1 = np.concatenate(([15], 72 + 28*np.sin(theta_lines), 72 + 28*np.sin(np.flip(theta_lines))))
|
99
|
+
x_lines2 = np.concatenate(([-42.5], -42.5 + 28 - 28*np.cos(theta_lines), 42.5 - 28 + 28*np.cos(np.flip(theta_lines))))
|
100
|
+
y_lines2 = np.concatenate(([15], -72 - 28*np.sin(theta_lines), -72 - 28*np.sin(np.flip(theta_lines))))
|
101
|
+
fig.add_trace(go.Scatter(x=x_lines1, y=y_lines1, mode='lines', line=dict(color='white', width=2), showlegend=False, hoverinfo='skip'))
|
102
|
+
fig.add_trace(go.Scatter(x=x_lines2, y=y_lines2, mode='lines', line=dict(color='white', width=2), showlegend=False, hoverinfo='skip'))
|
103
|
+
fig.add_shape(type='line', xref='x', yref='y', x0=42.5, y0=-72.5, x1=42.5, y1=72.5, line=dict(color='white', width=2))
|
104
|
+
fig.add_shape(type='line', xref='x', yref='y', x0=-42.5, y0=-72.5, x1=-42.5, y1=72.5, line=dict(color='white', width=2))
|
105
|
+
|
106
|
+
# Add goals
|
107
|
+
goal_width = 6 # feet
|
108
|
+
goal_depth = 4 # feet
|
109
|
+
|
110
|
+
# Top goal
|
111
|
+
fig.add_shape(
|
112
|
+
type="rect",
|
113
|
+
xref="x",
|
114
|
+
yref="y",
|
115
|
+
x0=-goal_width / 2,
|
116
|
+
y0=89,
|
117
|
+
x1=goal_width / 2,
|
118
|
+
y1=89 + goal_depth,
|
119
|
+
line=dict(color="red", width=2),
|
120
|
+
)
|
121
|
+
# Bottom goal
|
122
|
+
fig.add_shape(
|
123
|
+
type="rect",
|
124
|
+
xref="x",
|
125
|
+
yref="y",
|
126
|
+
x0=-goal_width / 2,
|
127
|
+
y0=-89 - goal_depth,
|
128
|
+
x1=goal_width / 2,
|
129
|
+
y1=-89,
|
130
|
+
line=dict(color="red", width=2),
|
131
|
+
)
|
132
|
+
|
133
|
+
else :
|
134
|
+
setting_dict = {
|
135
|
+
"full" : [-101, 101],
|
136
|
+
"offense" : [0, 101],
|
137
|
+
"ozone" : [25, 101],
|
138
|
+
"defense" : [-101, 0],
|
139
|
+
"dzone" : [-101, -25]
|
140
|
+
}
|
141
|
+
fig.update_layout(xaxis=dict(range=setting_dict[setting], showgrid=False, zeroline=False, showticklabels=False), yaxis=dict(range=[-42.6, 42.6], showgrid=False, zeroline=False, showticklabels=False, constrain="domain"),
|
142
|
+
showlegend=True, autosize =True, template="plotly_white")
|
143
|
+
fig.update_yaxes(
|
144
|
+
scaleanchor="x",
|
145
|
+
scaleratio=1,
|
146
|
+
)
|
147
|
+
def goal_crease(flip=1):
|
148
|
+
y_seq = np.linspace(-4, 4, 100)
|
149
|
+
y_goal = np.concatenate(([-4], y_seq, [4]))
|
150
|
+
x_goal = flip * np.concatenate(([89], 83 + y_seq**2/4**2*1.5, [89]))
|
151
|
+
goal_crease = go.Scatter(x=x_goal, y=y_goal, fill='toself', fillcolor='rgba(173, 216, 230, 0.3)', line=dict(color='red'), showlegend=False, hoverinfo='skip')
|
152
|
+
return goal_crease
|
153
|
+
|
154
|
+
# Outer circle
|
155
|
+
theta = np.linspace(0, 2 * np.pi, 300)
|
156
|
+
x_outer = 15 * np.sin(theta)
|
157
|
+
y_outer = 15 * np.cos(theta)
|
158
|
+
fig.add_trace(go.Scatter(x=x_outer, y=y_outer, mode='lines', line=dict(color='royalblue', width=2), showlegend=False, hoverinfo='skip'))
|
159
|
+
# Inner circle
|
160
|
+
theta2 = np.linspace(3 * np.pi / 2, np.pi / 2, 300) # Update theta2 to rotate the plot by 180 degrees
|
161
|
+
x_inner = 10 * np.sin(theta2) # Update x_inner to rotate the plot by 180 degrees
|
162
|
+
y_inner = -42.5 - 10 * np.cos(theta2) # Update y_inner to rotate the plot by 180 degrees
|
163
|
+
fig.add_trace(go.Scatter(x=x_inner, y=y_inner, mode='lines', line=dict(color='red', width=2), showlegend=False, hoverinfo='skip'))
|
164
|
+
|
165
|
+
# Rink boundaries
|
166
|
+
fig.add_shape(type='rect', xref='x', yref='y', x0=25, y0=-42.5, x1=26, y1=42.5, line=dict(color='royalblue', width=1), fillcolor='royalblue', opacity=1)
|
167
|
+
fig.add_shape(type='rect', xref='x', yref='y', x0=-25, y0=-42.5, x1=-26, y1=42.5, line=dict(color='royalblue', width=1), fillcolor='royalblue', opacity=1)
|
168
|
+
fig.add_shape(type='rect', xref='x', yref='y', x0=-0.5, y0=-42.5, x1=0.5, y1=42.5, line=dict(color='red', width=2), fillcolor='red')
|
169
|
+
# Goal crease
|
170
|
+
fig.add_trace(goal_crease())
|
171
|
+
fig.add_trace(goal_crease(-1))
|
172
|
+
# Goal lines
|
173
|
+
goal_line_extreme = 42.5 - 28 + np.sqrt(28 ** 2 - (28 - 11) ** 2)
|
174
|
+
fig.add_shape(type='line', xref='x', yref='y', x0=89, y0=-goal_line_extreme, x1=89, y1=goal_line_extreme, line=dict(color='red', width=2))
|
175
|
+
fig.add_shape(type='line', xref='x', yref='y', x0=-89, y0=-goal_line_extreme, x1=-89, y1=goal_line_extreme, line=dict(color='red', width=2))
|
176
|
+
# Faceoff circles
|
177
|
+
fig.add_traces(faceoff_circle(-69, -22))
|
178
|
+
fig.add_traces(faceoff_circle(-69, 22))
|
179
|
+
fig.add_traces(faceoff_circle(69, -22))
|
180
|
+
fig.add_traces(faceoff_circle(69, 22))
|
181
|
+
fig.add_traces(faceoff_circle(-20, -22, False))
|
182
|
+
fig.add_traces(faceoff_circle(-20, 22, False))
|
183
|
+
fig.add_traces(faceoff_circle(20, -22, False))
|
184
|
+
fig.add_traces(faceoff_circle(20, 22, False))
|
185
|
+
|
186
|
+
# Sidelines
|
187
|
+
theta_lines = np.linspace(0, np.pi / 2, 20)
|
188
|
+
x_lines1 = np.concatenate(([15], 72 + 28 * np.sin(theta_lines), 72 + 28 * np.sin(np.flip(theta_lines))))
|
189
|
+
y_lines1 = np.concatenate(([-42.5], -42.5 + 28 - 28 * np.cos(theta_lines), 42.5 - 28 + 28 * np.cos(np.flip(theta_lines))))
|
190
|
+
x_lines2 = np.concatenate(([15], -72 - 28 * np.sin(theta_lines), -72 - 28 * np.sin(np.flip(theta_lines))))
|
191
|
+
y_lines2 = np.concatenate(([-42.5], -42.5 + 28 - 28 * np.cos(theta_lines), 42.5 - 28 + 28 * np.cos(np.flip(theta_lines))))
|
192
|
+
fig.add_trace(go.Scatter(x=x_lines1, y=y_lines1, mode='lines', line=dict(color='white', width=2), showlegend=False, hoverinfo='skip'))
|
193
|
+
fig.add_trace(go.Scatter(x=x_lines2, y=y_lines2, mode='lines', line=dict(color='white', width=2), showlegend=False, hoverinfo='skip'))
|
194
|
+
fig.add_shape(type='line', xref='x', yref='y', x0=-72.5, y0=-42.5, x1=72.5, y1=-42.5, line=dict(color='white', width=2))
|
195
|
+
fig.add_shape(type='line', xref='x', yref='y', x0=-72.5, y0=42.5, x1=72.5, y1=42.5, line=dict(color='white', width=2))
|
196
|
+
|
197
|
+
# Add goals
|
198
|
+
goal_width = 6 # feet
|
199
|
+
goal_depth = 4 # feet
|
200
|
+
|
201
|
+
# Right goal
|
202
|
+
fig.add_shape(
|
203
|
+
type="rect",
|
204
|
+
xref="x",
|
205
|
+
yref="y",
|
206
|
+
x0=89,
|
207
|
+
y0=-goal_width / 2,
|
208
|
+
x1=89 + goal_depth,
|
209
|
+
y1=goal_width / 2,
|
210
|
+
line=dict(color="red", width=2),
|
211
|
+
)
|
212
|
+
# Left goal
|
213
|
+
fig.add_shape(
|
214
|
+
type="rect",
|
215
|
+
xref="x",
|
216
|
+
yref="y",
|
217
|
+
x0=-89 - goal_depth,
|
218
|
+
y0=-goal_width / 2,
|
219
|
+
x1=-89,
|
220
|
+
y1=goal_width / 2,
|
221
|
+
line=dict(color="red", width=2),
|
222
|
+
)
|
223
|
+
|
224
|
+
# Add logo
|
225
|
+
logo = Image.open(rs.get('https://weakside-breakout.s3.us-east-2.amazonaws.com/utils/wsba.png',stream=True).raw)
|
226
|
+
|
227
|
+
fig.add_layout_image(
|
228
|
+
dict(
|
229
|
+
source=logo,
|
230
|
+
xref="x",
|
231
|
+
yref="y",
|
232
|
+
x=-12,
|
233
|
+
y=12,
|
234
|
+
sizex=24,
|
235
|
+
sizey=24,
|
236
|
+
sizing="stretch",
|
237
|
+
opacity=1)
|
238
|
+
)
|
239
|
+
|
240
|
+
#Set background to transparent
|
241
|
+
fig.update_layout(
|
242
|
+
paper_bgcolor="rgba(0,0,0,0)",
|
243
|
+
plot_bgcolor="rgba(0,0,0,0)"
|
244
|
+
)
|
245
|
+
return fig
|
@@ -26,8 +26,8 @@ def server(input, output, session):
|
|
26
26
|
#If no input data is provided automatically provide a select skater and plot all 5v5 fenwick shots
|
27
27
|
#If no input data is provided automatically provide a select skater and plot all 5v5 fenwick shots
|
28
28
|
defaults = {
|
29
|
-
'
|
30
|
-
'
|
29
|
+
'seasons':['20242025,20242025'],
|
30
|
+
'teams':['EDM,FLA'],
|
31
31
|
'strength_state':['5v5'],
|
32
32
|
'season_type':['2']
|
33
33
|
}
|
wsba_hockey/tools/agg.py
CHANGED
@@ -30,6 +30,7 @@ def calc_indv(pbp,game_strength,second_group):
|
|
30
30
|
ep1 = (
|
31
31
|
pbp.loc[pbp['event_type'].isin(["goal", "shot-on-goal", "missed-shot","blocked-shot",'hit','giveaway','takeaway','faceoff','penalty'])].groupby(raw_group_1).agg(
|
32
32
|
Gi=('event_type', lambda x: (x == "goal").sum()),
|
33
|
+
Si=('event_type', lambda x: (x.isin(['shot-on-goal','goal'])).sum()),
|
33
34
|
Fi=('event_type', lambda x: (x.isin(fenwick_events)).sum()),
|
34
35
|
Ci=('event_type', lambda x: (x.isin(fenwick_events+['blocked-shot'])).sum()),
|
35
36
|
xGi=('xG', 'sum'),
|
@@ -80,6 +81,7 @@ def calc_indv(pbp,game_strength,second_group):
|
|
80
81
|
shot = (
|
81
82
|
pbp.loc[(pbp['event_type'].isin(["goal", "shot-on-goal", "missed-shot"])&(pbp['shot_type']==type))].groupby(raw_group_1).agg(
|
82
83
|
Gi=('event_type', lambda x: (x == "goal").sum()),
|
84
|
+
Si=('event_type', lambda x: (x.isin(['shot-on-goal','goal'])).sum()),
|
83
85
|
Fi=('event_type', lambda x: (x != "blocked-shot").sum()),
|
84
86
|
xGi=('xG', 'sum'),
|
85
87
|
).reset_index().rename(columns={'event_player_1_id': 'ID', 'event_team_abbr': 'Team', 'season': 'Season', 'game_id':'Game'})
|
@@ -96,6 +98,7 @@ def calc_indv(pbp,game_strength,second_group):
|
|
96
98
|
|
97
99
|
indv['P1'] = indv['Gi']+indv['A1']
|
98
100
|
indv['P'] = indv['P1']+indv['A2']
|
101
|
+
indv['Shi%'] = indv['Gi']/indv['Si']
|
99
102
|
indv['xGi/Fi'] = indv['xGi']/indv['Fi']
|
100
103
|
indv['Gi/xGi'] = indv['Gi']/indv['xGi']
|
101
104
|
indv['Fshi%'] = indv['Gi']/indv['Fi']
|
@@ -134,6 +137,8 @@ def calc_onice(pbp,game_strength,second_group):
|
|
134
137
|
df['xGA'] = np.where(df['event_team_abbr'] == df[opp_col], df['xG'], 0)
|
135
138
|
df['GF'] = np.where((df['event_type'] == "goal") & (df['event_team_abbr'] == df[team_col]), 1, 0)
|
136
139
|
df['GA'] = np.where((df['event_type'] == "goal") & (df['event_team_abbr'] == df[opp_col]), 1, 0)
|
140
|
+
df['SF'] = np.where((df['event_type'].isin(['shot-on-goal','goal'])) & (df['event_team_abbr'] == df[team_col]), 1, 0)
|
141
|
+
df['SA'] = np.where((df['event_type'].isin(['shot-on-goal','goal'])) & (df['event_team_abbr'] == df[opp_col]), 1, 0)
|
137
142
|
df['FF'] = np.where((df['event_type'].isin(fenwick_events)) & (df['event_team_abbr'] == df[team_col]), 1, 0)
|
138
143
|
df['FA'] = np.where((df['event_type'].isin(fenwick_events)) & (df['event_team_abbr'] == df[opp_col]), 1, 0)
|
139
144
|
df['CF'] = np.where((df['event_type'].isin(fenwick_events+['blocked-shot'])) & (df['event_team_abbr'] == df[team_col]), 1, 0)
|
@@ -149,6 +154,8 @@ def calc_onice(pbp,game_strength,second_group):
|
|
149
154
|
FA=('FA', 'sum'),
|
150
155
|
GF=('GF', 'sum'),
|
151
156
|
GA=('GA', 'sum'),
|
157
|
+
SF=('SF', 'sum'),
|
158
|
+
SA=('SA', 'sum'),
|
152
159
|
xGF=('xGF', 'sum'),
|
153
160
|
xGA=('xGA', 'sum'),
|
154
161
|
CF=('CF','sum'),
|
@@ -170,6 +177,8 @@ def calc_onice(pbp,game_strength,second_group):
|
|
170
177
|
FA=('FA', 'sum'),
|
171
178
|
GF=('GF', 'sum'),
|
172
179
|
GA=('GA', 'sum'),
|
180
|
+
SF=('SF', 'sum'),
|
181
|
+
SA=('SA', 'sum'),
|
173
182
|
xGF=('xGF', 'sum'),
|
174
183
|
xGA=('xGA', 'sum'),
|
175
184
|
CF=('CF','sum'),
|
@@ -179,15 +188,18 @@ def calc_onice(pbp,game_strength,second_group):
|
|
179
188
|
DZF=('DZF','sum')
|
180
189
|
).reset_index()
|
181
190
|
|
191
|
+
onice_stats['ShF%'] = onice_stats['GF']/onice_stats['SF']
|
182
192
|
onice_stats['xGF/FF'] = onice_stats['xGF']/onice_stats['FF']
|
183
193
|
onice_stats['GF/xGF'] = onice_stats['GF']/onice_stats['xGF']
|
184
194
|
onice_stats['FshF%'] = onice_stats['GF']/onice_stats['FF']
|
195
|
+
onice_stats['ShA%'] = onice_stats['GA']/onice_stats['SA']
|
185
196
|
onice_stats['xGA/FA'] = onice_stats['xGA']/onice_stats['FA']
|
186
197
|
onice_stats['GA/xGA'] = onice_stats['GA']/onice_stats['xGA']
|
187
198
|
onice_stats['FshA%'] = onice_stats['GA']/onice_stats['FA']
|
188
199
|
onice_stats['OZF%'] = onice_stats['OZF']/(onice_stats['OZF']+onice_stats['NZF']+onice_stats['DZF'])
|
189
200
|
onice_stats['NZF%'] = onice_stats['NZF']/(onice_stats['OZF']+onice_stats['NZF']+onice_stats['DZF'])
|
190
201
|
onice_stats['DZF%'] = onice_stats['DZF']/(onice_stats['OZF']+onice_stats['NZF']+onice_stats['DZF'])
|
202
|
+
onice_stats['GSAx'] = onice_stats['xGA']-onice_stats['GA']
|
191
203
|
|
192
204
|
return onice_stats
|
193
205
|
|
@@ -206,6 +218,8 @@ def calc_team(pbp,game_strength,second_group):
|
|
206
218
|
pbp['xGA'] = np.where(pbp['event_team_abbr'] == pbp[f'{team[1]}_team_abbr'], pbp['xG'], 0)
|
207
219
|
pbp['GF'] = np.where((pbp['event_type'] == "goal") & (pbp['event_team_abbr'] == pbp[f'{team[0]}_team_abbr']), 1, 0)
|
208
220
|
pbp['GA'] = np.where((pbp['event_type'] == "goal") & (pbp['event_team_abbr'] == pbp[f'{team[1]}_team_abbr']), 1, 0)
|
221
|
+
pbp['SF'] = np.where((pbp['event_type'].isin(['shot-on-goal','goal'])) & (pbp['event_team_abbr'] == pbp[f'{team[0]}_team_abbr']), 1, 0)
|
222
|
+
pbp['SA'] = np.where((pbp['event_type'].isin(['shot-on-goal','goal'])) & (pbp['event_team_abbr'] == pbp[f'{team[1]}_team_abbr']), 1, 0)
|
209
223
|
pbp['FF'] = np.where((pbp['event_type'].isin(fenwick_events)) & (pbp['event_team_abbr'] == pbp[f'{team[0]}_team_abbr']), 1, 0)
|
210
224
|
pbp['FA'] = np.where((pbp['event_type'].isin(fenwick_events)) & (pbp['event_team_abbr'] == pbp[f'{team[1]}_team_abbr']), 1, 0)
|
211
225
|
pbp['CF'] = np.where((pbp['event_type'].isin(fenwick_events+['blocked-shot'])) & (pbp['event_team_abbr'] == pbp[f'{team[0]}_team_abbr']), 1, 0)
|
@@ -234,6 +248,8 @@ def calc_team(pbp,game_strength,second_group):
|
|
234
248
|
FA=('FA', 'sum'),
|
235
249
|
GF=('GF', 'sum'),
|
236
250
|
GA=('GA', 'sum'),
|
251
|
+
SF=('SF','sum'),
|
252
|
+
SA=('SA','sum'),
|
237
253
|
xGF=('xGF', 'sum'),
|
238
254
|
xGA=('xGA', 'sum'),
|
239
255
|
CF=('CF','sum'),
|
@@ -264,6 +280,8 @@ def calc_team(pbp,game_strength,second_group):
|
|
264
280
|
FA=('FA', 'sum'),
|
265
281
|
GF=('GF', 'sum'),
|
266
282
|
GA=('GA', 'sum'),
|
283
|
+
SF=('SF','sum'),
|
284
|
+
SA=('SA','sum'),
|
267
285
|
xGF=('xGF', 'sum'),
|
268
286
|
xGA=('xGA', 'sum'),
|
269
287
|
CF=('CF','sum'),
|
@@ -286,15 +304,18 @@ def calc_team(pbp,game_strength,second_group):
|
|
286
304
|
RushAG=('RushAG','sum'),
|
287
305
|
).reset_index()
|
288
306
|
|
307
|
+
onice_stats['ShF%'] = onice_stats['GF']/onice_stats['SF']
|
289
308
|
onice_stats['xGF/FF'] = onice_stats['xGF']/onice_stats['FF']
|
290
309
|
onice_stats['GF/xGF'] = onice_stats['GF']/onice_stats['xGF']
|
291
310
|
onice_stats['FshF%'] = onice_stats['GF']/onice_stats['FF']
|
311
|
+
onice_stats['ShA%'] = onice_stats['GA']/onice_stats['SA']
|
292
312
|
onice_stats['xGA/FA'] = onice_stats['xGA']/onice_stats['FA']
|
293
313
|
onice_stats['GA/xGA'] = onice_stats['GA']/onice_stats['xGA']
|
294
314
|
onice_stats['FshA%'] = onice_stats['GA']/onice_stats['FA']
|
295
315
|
onice_stats['PM%'] = onice_stats['Take']/(onice_stats['Give']+onice_stats['Take'])
|
296
316
|
onice_stats['HF%'] = onice_stats['HF']/(onice_stats['HF']+onice_stats['HA'])
|
297
317
|
onice_stats['PENL%'] = onice_stats['Draw']/(onice_stats['Draw']+onice_stats['Penl'])
|
318
|
+
onice_stats['GSAx'] = onice_stats['xGA']/onice_stats['GA']
|
298
319
|
|
299
320
|
return onice_stats
|
300
321
|
|
@@ -313,6 +334,8 @@ def calc_goalie(pbp,game_strength,second_group):
|
|
313
334
|
pbp['xGA'] = np.where(pbp['event_team_abbr'] == pbp[f'{team[1]}_team_abbr'], pbp['xG'], 0)
|
314
335
|
pbp['GF'] = np.where((pbp['event_type'] == "goal") & (pbp['event_team_abbr'] == pbp[f'{team[0]}_team_abbr']), 1, 0)
|
315
336
|
pbp['GA'] = np.where((pbp['event_type'] == "goal") & (pbp['event_team_abbr'] == pbp[f'{team[1]}_team_abbr']), 1, 0)
|
337
|
+
pbp['SF'] = np.where((pbp['event_type'].isin(['shot-on-goal','goal'])) & (pbp['event_team_abbr'] == pbp[f'{team[0]}_team_abbr']), 1, 0)
|
338
|
+
pbp['SA'] = np.where((pbp['event_type'].isin(['shot-on-goal','goal'])) & (pbp['event_team_abbr'] == pbp[f'{team[1]}_team_abbr']), 1, 0)
|
316
339
|
pbp['FF'] = np.where((pbp['event_type'].isin(fenwick_events)) & (pbp['event_team_abbr'] == pbp[f'{team[0]}_team_abbr']), 1, 0)
|
317
340
|
pbp['FA'] = np.where((pbp['event_type'].isin(fenwick_events)) & (pbp['event_team_abbr'] == pbp[f'{team[1]}_team_abbr']), 1, 0)
|
318
341
|
pbp['CF'] = np.where((pbp['event_type'].isin(fenwick_events+['blocked-shot'])) & (pbp['event_team_abbr'] == pbp[f'{team[0]}_team_abbr']), 1, 0)
|
@@ -331,6 +354,8 @@ def calc_goalie(pbp,game_strength,second_group):
|
|
331
354
|
FA=('FA', 'sum'),
|
332
355
|
GF=('GF', 'sum'),
|
333
356
|
GA=('GA', 'sum'),
|
357
|
+
SF=('SF', 'sum'),
|
358
|
+
SA=('SA', 'sum'),
|
334
359
|
xGF=('xGF', 'sum'),
|
335
360
|
xGA=('xGA', 'sum'),
|
336
361
|
CF=('CF','sum'),
|
@@ -351,6 +376,8 @@ def calc_goalie(pbp,game_strength,second_group):
|
|
351
376
|
FA=('FA', 'sum'),
|
352
377
|
GF=('GF', 'sum'),
|
353
378
|
GA=('GA', 'sum'),
|
379
|
+
SF=('SF', 'sum'),
|
380
|
+
SA=('SA', 'sum'),
|
354
381
|
xGF=('xGF', 'sum'),
|
355
382
|
xGA=('xGA', 'sum'),
|
356
383
|
CF=('CF','sum'),
|
@@ -363,9 +390,11 @@ def calc_goalie(pbp,game_strength,second_group):
|
|
363
390
|
RushAG=('RushAG','sum'),
|
364
391
|
).reset_index()
|
365
392
|
|
393
|
+
onice_stats['ShF%'] = onice_stats['GF']/onice_stats['SF']
|
366
394
|
onice_stats['xGF/FF'] = onice_stats['xGF']/onice_stats['FF']
|
367
395
|
onice_stats['GF/xGF'] = onice_stats['GF']/onice_stats['xGF']
|
368
396
|
onice_stats['FshF%'] = onice_stats['GF']/onice_stats['FF']
|
397
|
+
onice_stats['ShA%'] = onice_stats['GA']/onice_stats['SA']
|
369
398
|
onice_stats['xGA/FA'] = onice_stats['xGA']/onice_stats['FA']
|
370
399
|
onice_stats['GA/xGA'] = onice_stats['GA']/onice_stats['xGA']
|
371
400
|
onice_stats['FshA%'] = onice_stats['GA']/onice_stats['FA']
|
wsba_hockey/tools/scraping.py
CHANGED
@@ -533,6 +533,171 @@ def parse_html(info):
|
|
533
533
|
#Return: parsed HTML pbp
|
534
534
|
return data
|
535
535
|
|
536
|
+
### ESPN SCRAPING FUNCTIONS ###
|
537
|
+
def espn_game_id(date,away,home):
|
538
|
+
#Given a date formatted as YYYY-MM-DD and teams, return game id from ESPN schedule
|
539
|
+
date = date.replace("-","")
|
540
|
+
|
541
|
+
#Retreive data
|
542
|
+
api = f"https://site.api.espn.com/apis/site/v2/sports/hockey/nhl/scoreboard?dates={date}"
|
543
|
+
schedule = pd.json_normalize(rs.get(api).json()['events'])
|
544
|
+
|
545
|
+
#Create team abbreviation columns
|
546
|
+
schedule['away_team_abbr'] = schedule['shortName'].str[:3].str.strip(" ")
|
547
|
+
schedule['home_team_abbr'] = schedule['shortName'].str[-3:].str.strip(" ")
|
548
|
+
|
549
|
+
#Modify team abbreviations as necessary
|
550
|
+
schedule = schedule.replace({
|
551
|
+
"LA":"LAK",
|
552
|
+
"NJ":"NJD",
|
553
|
+
"SJ":"SJS",
|
554
|
+
"TB":"TBL",
|
555
|
+
})
|
556
|
+
|
557
|
+
#Retreive game id
|
558
|
+
game_id = schedule.loc[(schedule['away_team_abbr']==away)&
|
559
|
+
(schedule['home_team_abbr']==home),'id'].tolist()[0]
|
560
|
+
|
561
|
+
#Return: ESPN game id
|
562
|
+
return game_id
|
563
|
+
|
564
|
+
def parse_espn(date,away,home):
|
565
|
+
#Given a date formatted as YYYY-MM-DD and teams, return game events
|
566
|
+
game_id = espn_game_id(date,away,home)
|
567
|
+
url = f'https://www.espn.com/nhl/playbyplay/_/gameId/{game_id}'
|
568
|
+
|
569
|
+
#Code modified from Patrick Bacon
|
570
|
+
|
571
|
+
#Retreive game events as json
|
572
|
+
page = rs.get(url, headers={'User-Agent': 'Mozilla/5.0'}, timeout = 500)
|
573
|
+
soup = BeautifulSoup(page.content.decode('ISO-8859-1'), 'lxml', multi_valued_attributes = None)
|
574
|
+
json = json_lib.loads(str(soup).split('"playGrps":')[1].split(',"tms"')[0])
|
575
|
+
|
576
|
+
#DataFrame of time-related info for events
|
577
|
+
clock_df = pd.DataFrame()
|
578
|
+
|
579
|
+
for period in range(0, len(json)):
|
580
|
+
clock_df = clock_df._append(pd.DataFrame(json[period]))
|
581
|
+
|
582
|
+
clock_df = clock_df[~pd.isna(clock_df.clock)]
|
583
|
+
|
584
|
+
# Needed to add .split(',"st":3')[0] for playoffs
|
585
|
+
|
586
|
+
#DataFrame of coordinates for events
|
587
|
+
coords_df = pd.DataFrame(json_lib.loads(str(soup).split('plays":')[1].split(',"st":1')[0].split(',"st":2')[0].split(',"st":3')[0]))
|
588
|
+
|
589
|
+
clock_df = clock_df.assign(
|
590
|
+
clock = clock_df.clock.apply(lambda x: x['displayValue'])
|
591
|
+
)
|
592
|
+
|
593
|
+
coords_df = coords_df.assign(
|
594
|
+
coords_x = coords_df[~pd.isna(coords_df.coordinate)].coordinate.apply(lambda x: x['x']).astype(int),
|
595
|
+
coords_y = coords_df[~pd.isna(coords_df.coordinate)].coordinate.apply(lambda y: y['y']).astype(int),
|
596
|
+
)
|
597
|
+
|
598
|
+
#Combine
|
599
|
+
espn_events = coords_df.merge(clock_df.loc[:, ['id', 'clock']])
|
600
|
+
|
601
|
+
espn_events = espn_events.assign(
|
602
|
+
period = espn_events['period'].apply(lambda x: x['number']),
|
603
|
+
minutes = espn_events['clock'].str.split(':').apply(lambda x: x[0]).astype(int),
|
604
|
+
seconds = espn_events['clock'].str.split(':').apply(lambda x: x[1]).astype(int),
|
605
|
+
event_type = espn_events['type'].apply(lambda x: x['txt'])
|
606
|
+
)
|
607
|
+
|
608
|
+
espn_events = espn_events.assign(coords_x = np.where((pd.isna(espn_events.coords_x)) & (pd.isna(espn_events.coords_y)) &
|
609
|
+
(espn_events.event_type=='Face Off'), 0, espn_events.coords_x
|
610
|
+
),
|
611
|
+
coords_y = np.where((pd.isna(espn_events.coords_x)) & (pd.isna(espn_events.coords_y)) &
|
612
|
+
(espn_events.event_type=='Face Off'), 0, espn_events.coords_y))
|
613
|
+
|
614
|
+
espn_events = espn_events[(~pd.isna(espn_events.coords_x)) & (~pd.isna(espn_events.coords_y))]
|
615
|
+
|
616
|
+
espn_events = espn_events.assign(
|
617
|
+
coords_x = espn_events.coords_x.astype(int),
|
618
|
+
coords_y = espn_events.coords_y.astype(int)
|
619
|
+
)
|
620
|
+
|
621
|
+
#Rename events
|
622
|
+
#The turnover event includes just one player in the event information, meaning takeaways will have no coordinates for play-by-plays created by ESPN scraping
|
623
|
+
espn_events['event_type'] = espn_events['event_type'].replace({
|
624
|
+
"Face Off":'faceoff',
|
625
|
+
"Hit":'hit',
|
626
|
+
"Shot":'shot-on-goal',
|
627
|
+
"Missed":'missed-shot',
|
628
|
+
"Blocked":'blocked-shot',
|
629
|
+
"Goal":'goal',
|
630
|
+
"Delayed Penalty":'delayed-penalty',
|
631
|
+
"Penalty":'penalty',
|
632
|
+
})
|
633
|
+
|
634
|
+
#Period time adjustments (only 'seconds_elapsed' is included in the resulting data)
|
635
|
+
espn_events['period_time_simple'] = espn_events['clock'].str.replace(":","",regex=True)
|
636
|
+
espn_events['period_seconds_elapsed'] = np.where(espn_events['period_time_simple'].str.len()==3,
|
637
|
+
((espn_events['period_time_simple'].str[0].astype(int)*60)+espn_events['period_time_simple'].str[-2:].astype(int)),
|
638
|
+
((espn_events['period_time_simple'].str[0:2].astype(int)*60)+espn_events['period_time_simple'].str[-2:].astype(int)))
|
639
|
+
espn_events['seconds_elapsed'] = ((espn_events['period']-1)*1200)+espn_events['period_seconds_elapsed']
|
640
|
+
|
641
|
+
espn_events = espn_events.rename(columns = {'text':'description'})
|
642
|
+
|
643
|
+
#Add event team
|
644
|
+
espn_events['event_team_abbr'] = espn_events['homeAway'].replace({
|
645
|
+
"away":away,
|
646
|
+
"home":home
|
647
|
+
})
|
648
|
+
|
649
|
+
#Some games (mostly preseason and all star games) do not include coordinates.
|
650
|
+
try:
|
651
|
+
espn_events['x_fixed'] = abs(espn_events['coords_x'])
|
652
|
+
espn_events['y_fixed'] = np.where(espn_events['coords_x']<0,-espn_events['coords_y'],espn_events['coords_y'])
|
653
|
+
espn_events['x_adj'] = np.where(espn_events['homeAway']=="home",espn_events['x_fixed'],-espn_events['x_fixed'])
|
654
|
+
espn_events['y_adj'] = np.where(espn_events['homeAway']=="home",espn_events['y_fixed'],-espn_events['y_fixed'])
|
655
|
+
espn_events['event_distance'] = np.sqrt(((89 - espn_events['x_fixed'])**2) + (espn_events['y_fixed']**2))
|
656
|
+
espn_events['event_angle'] = np.degrees(np.arctan2(abs(espn_events['y_fixed']), abs(89 - espn_events['x_fixed'])))
|
657
|
+
except TypeError:
|
658
|
+
print(f"No coordinates found for ESPN game...")
|
659
|
+
|
660
|
+
espn_events['x_fixed'] = np.nan
|
661
|
+
espn_events['y_fixed'] = np.nan
|
662
|
+
espn_events['x_adj'] = np.nan
|
663
|
+
espn_events['y_adj'] = np.nan
|
664
|
+
espn_events['event_distance'] = np.nan
|
665
|
+
espn_events['event_angle'] = np.nan
|
666
|
+
|
667
|
+
#Assign score and fenwick for each event
|
668
|
+
fenwick_events = ['missed-shot','shot-on-goal','goal']
|
669
|
+
ag = 0
|
670
|
+
ags = []
|
671
|
+
hg = 0
|
672
|
+
hgs = []
|
673
|
+
|
674
|
+
af = 0
|
675
|
+
afs = []
|
676
|
+
hf = 0
|
677
|
+
hfs = []
|
678
|
+
for event,team in zip(list(espn_events['event_type']),list(espn_events['homeAway'])):
|
679
|
+
if event in fenwick_events:
|
680
|
+
if team == "home":
|
681
|
+
hf += 1
|
682
|
+
if event == 'goal':
|
683
|
+
hg += 1
|
684
|
+
else:
|
685
|
+
af += 1
|
686
|
+
if event == 'goal':
|
687
|
+
ag += 1
|
688
|
+
|
689
|
+
ags.append(ag)
|
690
|
+
hgs.append(hg)
|
691
|
+
afs.append(af)
|
692
|
+
hfs.append(hf)
|
693
|
+
|
694
|
+
espn_events['away_score'] = ags
|
695
|
+
espn_events['home_score'] = hgs
|
696
|
+
espn_events['away_fenwick'] = afs
|
697
|
+
espn_events['home_fenwick'] = hfs
|
698
|
+
#Return: play-by-play events in supplied game from ESPN
|
699
|
+
return espn_events
|
700
|
+
|
536
701
|
def assign_target(data):
|
537
702
|
#Assign target number to plays to assist with merging
|
538
703
|
|
@@ -554,12 +719,11 @@ def combine_pbp(info,sources):
|
|
554
719
|
#Route data combining - json if season is after 2009-2010:
|
555
720
|
if str(info['season']) in ['20052006','20062007','20072008','20082009','20092010']:
|
556
721
|
#ESPN x HTML
|
557
|
-
|
558
|
-
|
722
|
+
espn_pbp = parse_espn(str(info['game_date']),info['away_team_abbr'],info['home_team_abbr']).rename(columns={'coords_x':'x',"coords_y":'y'}).sort_values(['period','seconds_elapsed']).reset_index()
|
723
|
+
merge_col = ['period','seconds_elapsed','event_type','event_team_abbr']
|
559
724
|
|
560
725
|
#Merge pbp
|
561
|
-
|
562
|
-
print('In-repair, please try again later...')
|
726
|
+
df = pd.merge(html_pbp,espn_pbp,how='left',on=merge_col)
|
563
727
|
|
564
728
|
else:
|
565
729
|
#JSON x HTML
|
wsba_hockey/workspace.py
CHANGED
@@ -6,7 +6,9 @@ import numpy as np
|
|
6
6
|
|
7
7
|
season_load = wsba.repo_load_seasons()
|
8
8
|
|
9
|
-
select = season_load[
|
9
|
+
select = season_load[0:3]
|
10
|
+
|
11
|
+
data.pbp(select)
|
10
12
|
|
11
13
|
#pbp = data.load_pbp_db(select)
|
12
14
|
|
@@ -26,26 +28,4 @@ select = season_load[9:17]
|
|
26
28
|
#data.fix_names(['skater','goalie'],select)
|
27
29
|
|
28
30
|
## DATA EXPORT ##
|
29
|
-
#data.push_to_sheet(select,['skaters','team','info'])
|
30
|
-
|
31
|
-
wsba.nhl_scrape_game(['2024020008'],remove=[]).to_csv('wtfwhy.csv',index=False)
|
32
|
-
|
33
|
-
pbp = pd.read_parquet('pbp/parquet/nhl_pbp_20242025.parquet')
|
34
|
-
helle = pbp.loc[pbp['event_goalie_id']==8476945,
|
35
|
-
['game_id','period','seconds_elapsed',
|
36
|
-
'strength_state','event_type','description',
|
37
|
-
'event_goalie_id','x','y','xG']]
|
38
|
-
mp = pd.read_csv('shots_2024.csv')
|
39
|
-
goalie = mp.loc[mp['goalieIdForShot']==8476945,
|
40
|
-
['game_id','period','time','event','goalieIdForShot',
|
41
|
-
'xCord','yCord','xGoal']].replace({
|
42
|
-
'SHOT':'shot-on-goal',
|
43
|
-
'MISS':'missed-shot',
|
44
|
-
'GOAL':'goal'
|
45
|
-
})
|
46
|
-
|
47
|
-
helle.to_csv('hellebuyck.csv',index=False)
|
48
|
-
helle['game_id'] = helle['game_id'].astype(str)
|
49
|
-
goalie['game_id'] = ('20240'+goalie['game_id'].astype(str))
|
50
|
-
pd.merge(helle,goalie,how='left',left_on=['game_id','period','seconds_elapsed','event_type','x','y'],right_on=['game_id','period','time','event','xCord','yCord']).to_csv('test.csv',index=False)
|
51
|
-
|
31
|
+
#data.push_to_sheet(select,['skaters','team','info'])
|