wsba-hockey 1.1.3__py3-none-any.whl → 1.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- wsba_hockey/api/api/index.py +68 -28
- wsba_hockey/data_pipelines.py +3 -2
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/duos/app.py +210 -0
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/duos/calc.py +163 -0
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/line-combos/app.py +245 -0
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/line-combos/plot.py +275 -0
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/line-combos/rink_plot.py +245 -0
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/matchups/app.py +2 -2
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/pbp/app.py +1 -0
- wsba_hockey/tools/agg.py +30 -0
- wsba_hockey/tools/scraping.py +168 -4
- wsba_hockey/workspace.py +4 -24
- wsba_hockey/wsba_main.py +52 -51
- {wsba_hockey-1.1.3.dist-info → wsba_hockey-1.1.6.dist-info}/METADATA +9 -5
- {wsba_hockey-1.1.3.dist-info → wsba_hockey-1.1.6.dist-info}/RECORD +18 -13
- {wsba_hockey-1.1.3.dist-info → wsba_hockey-1.1.6.dist-info}/WHEEL +0 -0
- {wsba_hockey-1.1.3.dist-info → wsba_hockey-1.1.6.dist-info}/licenses/LICENSE +0 -0
- {wsba_hockey-1.1.3.dist-info → wsba_hockey-1.1.6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,245 @@
|
|
1
|
+
|
2
|
+
import numpy as np
|
3
|
+
import plotly.graph_objects as go
|
4
|
+
import io
|
5
|
+
import base64
|
6
|
+
import requests as rs
|
7
|
+
from PIL import Image
|
8
|
+
|
9
|
+
def rink(setting = "full", vertical = False):
|
10
|
+
'''
|
11
|
+
Function to plot rink in Plotly. Takes 2 arguments :
|
12
|
+
|
13
|
+
setting : full (default) for full ice, offense positive half of the ice, ozone positive quarter of ice, defense for negative half of the ice, dzone for negative quarter of the ice, and neutral for the neutral zone
|
14
|
+
vertical : True if you want a vertical rink, False (default) is for an horizontal rink
|
15
|
+
|
16
|
+
'''
|
17
|
+
|
18
|
+
def faceoff_circle(x, y, outer=True):
|
19
|
+
segments = []
|
20
|
+
theta = np.linspace(0, 2*np.pi, 300)
|
21
|
+
if outer:
|
22
|
+
# Outer circle
|
23
|
+
x_outer = x + 15*np.cos(theta)
|
24
|
+
y_outer = y + 15*np.sin(theta)
|
25
|
+
outer_circle = go.Scatter(x=x_outer, y=y_outer, mode='lines', line=dict(width=2, color='red'), showlegend=False, hoverinfo='skip')
|
26
|
+
|
27
|
+
segments.append(outer_circle)
|
28
|
+
|
29
|
+
# Inner circle
|
30
|
+
x_inner = x + np.cos(theta)
|
31
|
+
y_inner = y + np.sin(theta)
|
32
|
+
inner_circle = go.Scatter(x=x_inner, y=y_inner, mode='lines', fill='toself', fillcolor='rgba(255, 0, 0, 0.43)', line=dict(color='rgba(255, 0, 0, 1)', width=2), showlegend=False, hoverinfo='skip')
|
33
|
+
|
34
|
+
segments.append(inner_circle)
|
35
|
+
|
36
|
+
return segments #segments
|
37
|
+
|
38
|
+
fig = go.Figure()
|
39
|
+
|
40
|
+
if vertical :
|
41
|
+
setting_dict = {
|
42
|
+
"full" : [-101, 101],
|
43
|
+
"offense" : [0, 101],
|
44
|
+
"ozone" : [25, 101],
|
45
|
+
"defense" : [-101, 0],
|
46
|
+
"dzone" : [-101, -25],
|
47
|
+
"neutral" : [-25,25]
|
48
|
+
}
|
49
|
+
fig.update_layout(xaxis=dict(range=[-42.6, 42.6], showgrid=False, zeroline=False, showticklabels=False, constrain="domain"), yaxis=dict(range=setting_dict[setting], showgrid=False, zeroline=False, showticklabels=False, constrain="domain"),
|
50
|
+
showlegend=False, autosize=True, template="plotly_white")
|
51
|
+
fig.update_yaxes(
|
52
|
+
scaleanchor="x",
|
53
|
+
scaleratio=1,
|
54
|
+
)
|
55
|
+
def goal_crease(flip=1):
|
56
|
+
x_seq = np.linspace(-4, 4, 100)
|
57
|
+
x_goal = np.concatenate(([-4], x_seq, [4]))
|
58
|
+
y_goal = flip * np.concatenate(([89], 83 + x_seq**2/4**2*1.5, [89]))
|
59
|
+
goal_crease = go.Scatter(x=x_goal, y=y_goal, fill='toself', fillcolor='rgba(173, 216, 230, 0.3)', line=dict(color='red'))
|
60
|
+
return goal_crease
|
61
|
+
|
62
|
+
# Outer circle
|
63
|
+
theta = np.linspace(0, 2*np.pi, 300)
|
64
|
+
x_outer = 15 * np.cos(theta)
|
65
|
+
y_outer = 15 * np.sin(theta)
|
66
|
+
fig.add_trace(go.Scatter(x=x_outer, y=y_outer, mode='lines', line=dict(color='royalblue', width=2), showlegend=False, hoverinfo='skip'))
|
67
|
+
# Inner circle
|
68
|
+
theta2 = np.linspace(np.pi/2, 3*np.pi/2, 300)
|
69
|
+
x_inner = 42.5 + 10 * np.cos(theta2)
|
70
|
+
y_inner = 10 * np.sin(theta2)
|
71
|
+
fig.add_trace(go.Scatter(x=x_inner, y=y_inner, mode='lines', line=dict(color='red', width=2), showlegend=False, hoverinfo='skip'))
|
72
|
+
# Rink boundaries
|
73
|
+
fig.add_shape(type='rect', xref='x', yref='y', x0=-42.5, y0=25, x1=42.5, y1=26, line=dict(color='royalblue', width=1), fillcolor='royalblue', opacity=1)
|
74
|
+
fig.add_shape(type='rect', xref='x', yref='y', x0=-42.5, y0=-25, x1=42.5, y1=-26, line=dict(color='royalblue', width=1), fillcolor='royalblue', opacity=1)
|
75
|
+
fig.add_shape(type='rect', xref='x', yref='y', x0=-42.5, y0=-0.5, x1=42.5, y1=0.5, line=dict(color='red', width=2), fillcolor='red')
|
76
|
+
|
77
|
+
# Goal crease
|
78
|
+
fig.add_trace(goal_crease())
|
79
|
+
fig.add_trace(goal_crease(-1))
|
80
|
+
# Goal lines
|
81
|
+
goal_line_extreme = 42.5 - 28 + np.sqrt(28**2 - (28-11)**2)
|
82
|
+
fig.add_shape(type='line', xref='x', yref='y', x0=-goal_line_extreme, y0=89, x1=goal_line_extreme, y1=89, line=dict(color='red', width=2))
|
83
|
+
fig.add_shape(type='line', xref='x', yref='y', x0=-goal_line_extreme, y0=-89, x1=goal_line_extreme, y1=-89, line=dict(color='red', width=2))
|
84
|
+
|
85
|
+
# Faceoff circles
|
86
|
+
fig.add_traces(faceoff_circle(-22, 69))
|
87
|
+
fig.add_traces(faceoff_circle(22, 69))
|
88
|
+
fig.add_traces(faceoff_circle(-22, -69))
|
89
|
+
fig.add_traces(faceoff_circle(22, -69))
|
90
|
+
fig.add_traces(faceoff_circle(-22, -20, False))
|
91
|
+
fig.add_traces(faceoff_circle(22, -20, False))
|
92
|
+
fig.add_traces(faceoff_circle(-22, 20, False))
|
93
|
+
fig.add_traces(faceoff_circle(22, 20, False))
|
94
|
+
|
95
|
+
# Sidelines
|
96
|
+
theta_lines = np.linspace(0, np.pi/2, 20)
|
97
|
+
x_lines1 = np.concatenate(([-42.5], -42.5 + 28 - 28*np.cos(theta_lines), 42.5 - 28 + 28*np.cos(np.flip(theta_lines))))
|
98
|
+
y_lines1 = np.concatenate(([15], 72 + 28*np.sin(theta_lines), 72 + 28*np.sin(np.flip(theta_lines))))
|
99
|
+
x_lines2 = np.concatenate(([-42.5], -42.5 + 28 - 28*np.cos(theta_lines), 42.5 - 28 + 28*np.cos(np.flip(theta_lines))))
|
100
|
+
y_lines2 = np.concatenate(([15], -72 - 28*np.sin(theta_lines), -72 - 28*np.sin(np.flip(theta_lines))))
|
101
|
+
fig.add_trace(go.Scatter(x=x_lines1, y=y_lines1, mode='lines', line=dict(color='white', width=2), showlegend=False, hoverinfo='skip'))
|
102
|
+
fig.add_trace(go.Scatter(x=x_lines2, y=y_lines2, mode='lines', line=dict(color='white', width=2), showlegend=False, hoverinfo='skip'))
|
103
|
+
fig.add_shape(type='line', xref='x', yref='y', x0=42.5, y0=-72.5, x1=42.5, y1=72.5, line=dict(color='white', width=2))
|
104
|
+
fig.add_shape(type='line', xref='x', yref='y', x0=-42.5, y0=-72.5, x1=-42.5, y1=72.5, line=dict(color='white', width=2))
|
105
|
+
|
106
|
+
# Add goals
|
107
|
+
goal_width = 6 # feet
|
108
|
+
goal_depth = 4 # feet
|
109
|
+
|
110
|
+
# Top goal
|
111
|
+
fig.add_shape(
|
112
|
+
type="rect",
|
113
|
+
xref="x",
|
114
|
+
yref="y",
|
115
|
+
x0=-goal_width / 2,
|
116
|
+
y0=89,
|
117
|
+
x1=goal_width / 2,
|
118
|
+
y1=89 + goal_depth,
|
119
|
+
line=dict(color="red", width=2),
|
120
|
+
)
|
121
|
+
# Bottom goal
|
122
|
+
fig.add_shape(
|
123
|
+
type="rect",
|
124
|
+
xref="x",
|
125
|
+
yref="y",
|
126
|
+
x0=-goal_width / 2,
|
127
|
+
y0=-89 - goal_depth,
|
128
|
+
x1=goal_width / 2,
|
129
|
+
y1=-89,
|
130
|
+
line=dict(color="red", width=2),
|
131
|
+
)
|
132
|
+
|
133
|
+
else :
|
134
|
+
setting_dict = {
|
135
|
+
"full" : [-101, 101],
|
136
|
+
"offense" : [0, 101],
|
137
|
+
"ozone" : [25, 101],
|
138
|
+
"defense" : [-101, 0],
|
139
|
+
"dzone" : [-101, -25]
|
140
|
+
}
|
141
|
+
fig.update_layout(xaxis=dict(range=setting_dict[setting], showgrid=False, zeroline=False, showticklabels=False), yaxis=dict(range=[-42.6, 42.6], showgrid=False, zeroline=False, showticklabels=False, constrain="domain"),
|
142
|
+
showlegend=True, autosize =True, template="plotly_white")
|
143
|
+
fig.update_yaxes(
|
144
|
+
scaleanchor="x",
|
145
|
+
scaleratio=1,
|
146
|
+
)
|
147
|
+
def goal_crease(flip=1):
|
148
|
+
y_seq = np.linspace(-4, 4, 100)
|
149
|
+
y_goal = np.concatenate(([-4], y_seq, [4]))
|
150
|
+
x_goal = flip * np.concatenate(([89], 83 + y_seq**2/4**2*1.5, [89]))
|
151
|
+
goal_crease = go.Scatter(x=x_goal, y=y_goal, fill='toself', fillcolor='rgba(173, 216, 230, 0.3)', line=dict(color='red'), showlegend=False, hoverinfo='skip')
|
152
|
+
return goal_crease
|
153
|
+
|
154
|
+
# Outer circle
|
155
|
+
theta = np.linspace(0, 2 * np.pi, 300)
|
156
|
+
x_outer = 15 * np.sin(theta)
|
157
|
+
y_outer = 15 * np.cos(theta)
|
158
|
+
fig.add_trace(go.Scatter(x=x_outer, y=y_outer, mode='lines', line=dict(color='royalblue', width=2), showlegend=False, hoverinfo='skip'))
|
159
|
+
# Inner circle
|
160
|
+
theta2 = np.linspace(3 * np.pi / 2, np.pi / 2, 300) # Update theta2 to rotate the plot by 180 degrees
|
161
|
+
x_inner = 10 * np.sin(theta2) # Update x_inner to rotate the plot by 180 degrees
|
162
|
+
y_inner = -42.5 - 10 * np.cos(theta2) # Update y_inner to rotate the plot by 180 degrees
|
163
|
+
fig.add_trace(go.Scatter(x=x_inner, y=y_inner, mode='lines', line=dict(color='red', width=2), showlegend=False, hoverinfo='skip'))
|
164
|
+
|
165
|
+
# Rink boundaries
|
166
|
+
fig.add_shape(type='rect', xref='x', yref='y', x0=25, y0=-42.5, x1=26, y1=42.5, line=dict(color='royalblue', width=1), fillcolor='royalblue', opacity=1)
|
167
|
+
fig.add_shape(type='rect', xref='x', yref='y', x0=-25, y0=-42.5, x1=-26, y1=42.5, line=dict(color='royalblue', width=1), fillcolor='royalblue', opacity=1)
|
168
|
+
fig.add_shape(type='rect', xref='x', yref='y', x0=-0.5, y0=-42.5, x1=0.5, y1=42.5, line=dict(color='red', width=2), fillcolor='red')
|
169
|
+
# Goal crease
|
170
|
+
fig.add_trace(goal_crease())
|
171
|
+
fig.add_trace(goal_crease(-1))
|
172
|
+
# Goal lines
|
173
|
+
goal_line_extreme = 42.5 - 28 + np.sqrt(28 ** 2 - (28 - 11) ** 2)
|
174
|
+
fig.add_shape(type='line', xref='x', yref='y', x0=89, y0=-goal_line_extreme, x1=89, y1=goal_line_extreme, line=dict(color='red', width=2))
|
175
|
+
fig.add_shape(type='line', xref='x', yref='y', x0=-89, y0=-goal_line_extreme, x1=-89, y1=goal_line_extreme, line=dict(color='red', width=2))
|
176
|
+
# Faceoff circles
|
177
|
+
fig.add_traces(faceoff_circle(-69, -22))
|
178
|
+
fig.add_traces(faceoff_circle(-69, 22))
|
179
|
+
fig.add_traces(faceoff_circle(69, -22))
|
180
|
+
fig.add_traces(faceoff_circle(69, 22))
|
181
|
+
fig.add_traces(faceoff_circle(-20, -22, False))
|
182
|
+
fig.add_traces(faceoff_circle(-20, 22, False))
|
183
|
+
fig.add_traces(faceoff_circle(20, -22, False))
|
184
|
+
fig.add_traces(faceoff_circle(20, 22, False))
|
185
|
+
|
186
|
+
# Sidelines
|
187
|
+
theta_lines = np.linspace(0, np.pi / 2, 20)
|
188
|
+
x_lines1 = np.concatenate(([15], 72 + 28 * np.sin(theta_lines), 72 + 28 * np.sin(np.flip(theta_lines))))
|
189
|
+
y_lines1 = np.concatenate(([-42.5], -42.5 + 28 - 28 * np.cos(theta_lines), 42.5 - 28 + 28 * np.cos(np.flip(theta_lines))))
|
190
|
+
x_lines2 = np.concatenate(([15], -72 - 28 * np.sin(theta_lines), -72 - 28 * np.sin(np.flip(theta_lines))))
|
191
|
+
y_lines2 = np.concatenate(([-42.5], -42.5 + 28 - 28 * np.cos(theta_lines), 42.5 - 28 + 28 * np.cos(np.flip(theta_lines))))
|
192
|
+
fig.add_trace(go.Scatter(x=x_lines1, y=y_lines1, mode='lines', line=dict(color='white', width=2), showlegend=False, hoverinfo='skip'))
|
193
|
+
fig.add_trace(go.Scatter(x=x_lines2, y=y_lines2, mode='lines', line=dict(color='white', width=2), showlegend=False, hoverinfo='skip'))
|
194
|
+
fig.add_shape(type='line', xref='x', yref='y', x0=-72.5, y0=-42.5, x1=72.5, y1=-42.5, line=dict(color='white', width=2))
|
195
|
+
fig.add_shape(type='line', xref='x', yref='y', x0=-72.5, y0=42.5, x1=72.5, y1=42.5, line=dict(color='white', width=2))
|
196
|
+
|
197
|
+
# Add goals
|
198
|
+
goal_width = 6 # feet
|
199
|
+
goal_depth = 4 # feet
|
200
|
+
|
201
|
+
# Right goal
|
202
|
+
fig.add_shape(
|
203
|
+
type="rect",
|
204
|
+
xref="x",
|
205
|
+
yref="y",
|
206
|
+
x0=89,
|
207
|
+
y0=-goal_width / 2,
|
208
|
+
x1=89 + goal_depth,
|
209
|
+
y1=goal_width / 2,
|
210
|
+
line=dict(color="red", width=2),
|
211
|
+
)
|
212
|
+
# Left goal
|
213
|
+
fig.add_shape(
|
214
|
+
type="rect",
|
215
|
+
xref="x",
|
216
|
+
yref="y",
|
217
|
+
x0=-89 - goal_depth,
|
218
|
+
y0=-goal_width / 2,
|
219
|
+
x1=-89,
|
220
|
+
y1=goal_width / 2,
|
221
|
+
line=dict(color="red", width=2),
|
222
|
+
)
|
223
|
+
|
224
|
+
# Add logo
|
225
|
+
logo = Image.open(rs.get('https://weakside-breakout.s3.us-east-2.amazonaws.com/utils/wsba.png',stream=True).raw)
|
226
|
+
|
227
|
+
fig.add_layout_image(
|
228
|
+
dict(
|
229
|
+
source=logo,
|
230
|
+
xref="x",
|
231
|
+
yref="y",
|
232
|
+
x=-12,
|
233
|
+
y=12,
|
234
|
+
sizex=24,
|
235
|
+
sizey=24,
|
236
|
+
sizing="stretch",
|
237
|
+
opacity=1)
|
238
|
+
)
|
239
|
+
|
240
|
+
#Set background to transparent
|
241
|
+
fig.update_layout(
|
242
|
+
paper_bgcolor="rgba(0,0,0,0)",
|
243
|
+
plot_bgcolor="rgba(0,0,0,0)"
|
244
|
+
)
|
245
|
+
return fig
|
@@ -26,8 +26,8 @@ def server(input, output, session):
|
|
26
26
|
#If no input data is provided automatically provide a select skater and plot all 5v5 fenwick shots
|
27
27
|
#If no input data is provided automatically provide a select skater and plot all 5v5 fenwick shots
|
28
28
|
defaults = {
|
29
|
-
'
|
30
|
-
'
|
29
|
+
'seasons':['20242025,20242025'],
|
30
|
+
'teams':['EDM,FLA'],
|
31
31
|
'strength_state':['5v5'],
|
32
32
|
'season_type':['2']
|
33
33
|
}
|
wsba_hockey/tools/agg.py
CHANGED
@@ -30,6 +30,7 @@ def calc_indv(pbp,game_strength,second_group):
|
|
30
30
|
ep1 = (
|
31
31
|
pbp.loc[pbp['event_type'].isin(["goal", "shot-on-goal", "missed-shot","blocked-shot",'hit','giveaway','takeaway','faceoff','penalty'])].groupby(raw_group_1).agg(
|
32
32
|
Gi=('event_type', lambda x: (x == "goal").sum()),
|
33
|
+
Si=('event_type', lambda x: (x.isin(['shot-on-goal','goal'])).sum()),
|
33
34
|
Fi=('event_type', lambda x: (x.isin(fenwick_events)).sum()),
|
34
35
|
Ci=('event_type', lambda x: (x.isin(fenwick_events+['blocked-shot'])).sum()),
|
35
36
|
xGi=('xG', 'sum'),
|
@@ -80,6 +81,7 @@ def calc_indv(pbp,game_strength,second_group):
|
|
80
81
|
shot = (
|
81
82
|
pbp.loc[(pbp['event_type'].isin(["goal", "shot-on-goal", "missed-shot"])&(pbp['shot_type']==type))].groupby(raw_group_1).agg(
|
82
83
|
Gi=('event_type', lambda x: (x == "goal").sum()),
|
84
|
+
Si=('event_type', lambda x: (x.isin(['shot-on-goal','goal'])).sum()),
|
83
85
|
Fi=('event_type', lambda x: (x != "blocked-shot").sum()),
|
84
86
|
xGi=('xG', 'sum'),
|
85
87
|
).reset_index().rename(columns={'event_player_1_id': 'ID', 'event_team_abbr': 'Team', 'season': 'Season', 'game_id':'Game'})
|
@@ -87,6 +89,7 @@ def calc_indv(pbp,game_strength,second_group):
|
|
87
89
|
|
88
90
|
shot = shot.rename(columns={
|
89
91
|
'Gi':f'{type.capitalize()}Gi',
|
92
|
+
'Si':f'{type.capitalize()}Si',
|
90
93
|
'Fi':f'{type.capitalize()}Fi',
|
91
94
|
'xGi':f'{type.capitalize()}xGi',
|
92
95
|
})
|
@@ -96,6 +99,7 @@ def calc_indv(pbp,game_strength,second_group):
|
|
96
99
|
|
97
100
|
indv['P1'] = indv['Gi']+indv['A1']
|
98
101
|
indv['P'] = indv['P1']+indv['A2']
|
102
|
+
indv['Shi%'] = indv['Gi']/indv['Si']
|
99
103
|
indv['xGi/Fi'] = indv['xGi']/indv['Fi']
|
100
104
|
indv['Gi/xGi'] = indv['Gi']/indv['xGi']
|
101
105
|
indv['Fshi%'] = indv['Gi']/indv['Fi']
|
@@ -134,6 +138,8 @@ def calc_onice(pbp,game_strength,second_group):
|
|
134
138
|
df['xGA'] = np.where(df['event_team_abbr'] == df[opp_col], df['xG'], 0)
|
135
139
|
df['GF'] = np.where((df['event_type'] == "goal") & (df['event_team_abbr'] == df[team_col]), 1, 0)
|
136
140
|
df['GA'] = np.where((df['event_type'] == "goal") & (df['event_team_abbr'] == df[opp_col]), 1, 0)
|
141
|
+
df['SF'] = np.where((df['event_type'].isin(['shot-on-goal','goal'])) & (df['event_team_abbr'] == df[team_col]), 1, 0)
|
142
|
+
df['SA'] = np.where((df['event_type'].isin(['shot-on-goal','goal'])) & (df['event_team_abbr'] == df[opp_col]), 1, 0)
|
137
143
|
df['FF'] = np.where((df['event_type'].isin(fenwick_events)) & (df['event_team_abbr'] == df[team_col]), 1, 0)
|
138
144
|
df['FA'] = np.where((df['event_type'].isin(fenwick_events)) & (df['event_team_abbr'] == df[opp_col]), 1, 0)
|
139
145
|
df['CF'] = np.where((df['event_type'].isin(fenwick_events+['blocked-shot'])) & (df['event_team_abbr'] == df[team_col]), 1, 0)
|
@@ -149,6 +155,8 @@ def calc_onice(pbp,game_strength,second_group):
|
|
149
155
|
FA=('FA', 'sum'),
|
150
156
|
GF=('GF', 'sum'),
|
151
157
|
GA=('GA', 'sum'),
|
158
|
+
SF=('SF', 'sum'),
|
159
|
+
SA=('SA', 'sum'),
|
152
160
|
xGF=('xGF', 'sum'),
|
153
161
|
xGA=('xGA', 'sum'),
|
154
162
|
CF=('CF','sum'),
|
@@ -170,6 +178,8 @@ def calc_onice(pbp,game_strength,second_group):
|
|
170
178
|
FA=('FA', 'sum'),
|
171
179
|
GF=('GF', 'sum'),
|
172
180
|
GA=('GA', 'sum'),
|
181
|
+
SF=('SF', 'sum'),
|
182
|
+
SA=('SA', 'sum'),
|
173
183
|
xGF=('xGF', 'sum'),
|
174
184
|
xGA=('xGA', 'sum'),
|
175
185
|
CF=('CF','sum'),
|
@@ -179,15 +189,18 @@ def calc_onice(pbp,game_strength,second_group):
|
|
179
189
|
DZF=('DZF','sum')
|
180
190
|
).reset_index()
|
181
191
|
|
192
|
+
onice_stats['ShF%'] = onice_stats['GF']/onice_stats['SF']
|
182
193
|
onice_stats['xGF/FF'] = onice_stats['xGF']/onice_stats['FF']
|
183
194
|
onice_stats['GF/xGF'] = onice_stats['GF']/onice_stats['xGF']
|
184
195
|
onice_stats['FshF%'] = onice_stats['GF']/onice_stats['FF']
|
196
|
+
onice_stats['ShA%'] = onice_stats['GA']/onice_stats['SA']
|
185
197
|
onice_stats['xGA/FA'] = onice_stats['xGA']/onice_stats['FA']
|
186
198
|
onice_stats['GA/xGA'] = onice_stats['GA']/onice_stats['xGA']
|
187
199
|
onice_stats['FshA%'] = onice_stats['GA']/onice_stats['FA']
|
188
200
|
onice_stats['OZF%'] = onice_stats['OZF']/(onice_stats['OZF']+onice_stats['NZF']+onice_stats['DZF'])
|
189
201
|
onice_stats['NZF%'] = onice_stats['NZF']/(onice_stats['OZF']+onice_stats['NZF']+onice_stats['DZF'])
|
190
202
|
onice_stats['DZF%'] = onice_stats['DZF']/(onice_stats['OZF']+onice_stats['NZF']+onice_stats['DZF'])
|
203
|
+
onice_stats['GSAx'] = onice_stats['xGA']-onice_stats['GA']
|
191
204
|
|
192
205
|
return onice_stats
|
193
206
|
|
@@ -206,6 +219,8 @@ def calc_team(pbp,game_strength,second_group):
|
|
206
219
|
pbp['xGA'] = np.where(pbp['event_team_abbr'] == pbp[f'{team[1]}_team_abbr'], pbp['xG'], 0)
|
207
220
|
pbp['GF'] = np.where((pbp['event_type'] == "goal") & (pbp['event_team_abbr'] == pbp[f'{team[0]}_team_abbr']), 1, 0)
|
208
221
|
pbp['GA'] = np.where((pbp['event_type'] == "goal") & (pbp['event_team_abbr'] == pbp[f'{team[1]}_team_abbr']), 1, 0)
|
222
|
+
pbp['SF'] = np.where((pbp['event_type'].isin(['shot-on-goal','goal'])) & (pbp['event_team_abbr'] == pbp[f'{team[0]}_team_abbr']), 1, 0)
|
223
|
+
pbp['SA'] = np.where((pbp['event_type'].isin(['shot-on-goal','goal'])) & (pbp['event_team_abbr'] == pbp[f'{team[1]}_team_abbr']), 1, 0)
|
209
224
|
pbp['FF'] = np.where((pbp['event_type'].isin(fenwick_events)) & (pbp['event_team_abbr'] == pbp[f'{team[0]}_team_abbr']), 1, 0)
|
210
225
|
pbp['FA'] = np.where((pbp['event_type'].isin(fenwick_events)) & (pbp['event_team_abbr'] == pbp[f'{team[1]}_team_abbr']), 1, 0)
|
211
226
|
pbp['CF'] = np.where((pbp['event_type'].isin(fenwick_events+['blocked-shot'])) & (pbp['event_team_abbr'] == pbp[f'{team[0]}_team_abbr']), 1, 0)
|
@@ -234,6 +249,8 @@ def calc_team(pbp,game_strength,second_group):
|
|
234
249
|
FA=('FA', 'sum'),
|
235
250
|
GF=('GF', 'sum'),
|
236
251
|
GA=('GA', 'sum'),
|
252
|
+
SF=('SF','sum'),
|
253
|
+
SA=('SA','sum'),
|
237
254
|
xGF=('xGF', 'sum'),
|
238
255
|
xGA=('xGA', 'sum'),
|
239
256
|
CF=('CF','sum'),
|
@@ -264,6 +281,8 @@ def calc_team(pbp,game_strength,second_group):
|
|
264
281
|
FA=('FA', 'sum'),
|
265
282
|
GF=('GF', 'sum'),
|
266
283
|
GA=('GA', 'sum'),
|
284
|
+
SF=('SF','sum'),
|
285
|
+
SA=('SA','sum'),
|
267
286
|
xGF=('xGF', 'sum'),
|
268
287
|
xGA=('xGA', 'sum'),
|
269
288
|
CF=('CF','sum'),
|
@@ -286,15 +305,18 @@ def calc_team(pbp,game_strength,second_group):
|
|
286
305
|
RushAG=('RushAG','sum'),
|
287
306
|
).reset_index()
|
288
307
|
|
308
|
+
onice_stats['ShF%'] = onice_stats['GF']/onice_stats['SF']
|
289
309
|
onice_stats['xGF/FF'] = onice_stats['xGF']/onice_stats['FF']
|
290
310
|
onice_stats['GF/xGF'] = onice_stats['GF']/onice_stats['xGF']
|
291
311
|
onice_stats['FshF%'] = onice_stats['GF']/onice_stats['FF']
|
312
|
+
onice_stats['ShA%'] = onice_stats['GA']/onice_stats['SA']
|
292
313
|
onice_stats['xGA/FA'] = onice_stats['xGA']/onice_stats['FA']
|
293
314
|
onice_stats['GA/xGA'] = onice_stats['GA']/onice_stats['xGA']
|
294
315
|
onice_stats['FshA%'] = onice_stats['GA']/onice_stats['FA']
|
295
316
|
onice_stats['PM%'] = onice_stats['Take']/(onice_stats['Give']+onice_stats['Take'])
|
296
317
|
onice_stats['HF%'] = onice_stats['HF']/(onice_stats['HF']+onice_stats['HA'])
|
297
318
|
onice_stats['PENL%'] = onice_stats['Draw']/(onice_stats['Draw']+onice_stats['Penl'])
|
319
|
+
onice_stats['GSAx'] = onice_stats['xGA']/onice_stats['GA']
|
298
320
|
|
299
321
|
return onice_stats
|
300
322
|
|
@@ -313,6 +335,8 @@ def calc_goalie(pbp,game_strength,second_group):
|
|
313
335
|
pbp['xGA'] = np.where(pbp['event_team_abbr'] == pbp[f'{team[1]}_team_abbr'], pbp['xG'], 0)
|
314
336
|
pbp['GF'] = np.where((pbp['event_type'] == "goal") & (pbp['event_team_abbr'] == pbp[f'{team[0]}_team_abbr']), 1, 0)
|
315
337
|
pbp['GA'] = np.where((pbp['event_type'] == "goal") & (pbp['event_team_abbr'] == pbp[f'{team[1]}_team_abbr']), 1, 0)
|
338
|
+
pbp['SF'] = np.where((pbp['event_type'].isin(['shot-on-goal','goal'])) & (pbp['event_team_abbr'] == pbp[f'{team[0]}_team_abbr']), 1, 0)
|
339
|
+
pbp['SA'] = np.where((pbp['event_type'].isin(['shot-on-goal','goal'])) & (pbp['event_team_abbr'] == pbp[f'{team[1]}_team_abbr']), 1, 0)
|
316
340
|
pbp['FF'] = np.where((pbp['event_type'].isin(fenwick_events)) & (pbp['event_team_abbr'] == pbp[f'{team[0]}_team_abbr']), 1, 0)
|
317
341
|
pbp['FA'] = np.where((pbp['event_type'].isin(fenwick_events)) & (pbp['event_team_abbr'] == pbp[f'{team[1]}_team_abbr']), 1, 0)
|
318
342
|
pbp['CF'] = np.where((pbp['event_type'].isin(fenwick_events+['blocked-shot'])) & (pbp['event_team_abbr'] == pbp[f'{team[0]}_team_abbr']), 1, 0)
|
@@ -331,6 +355,8 @@ def calc_goalie(pbp,game_strength,second_group):
|
|
331
355
|
FA=('FA', 'sum'),
|
332
356
|
GF=('GF', 'sum'),
|
333
357
|
GA=('GA', 'sum'),
|
358
|
+
SF=('SF', 'sum'),
|
359
|
+
SA=('SA', 'sum'),
|
334
360
|
xGF=('xGF', 'sum'),
|
335
361
|
xGA=('xGA', 'sum'),
|
336
362
|
CF=('CF','sum'),
|
@@ -351,6 +377,8 @@ def calc_goalie(pbp,game_strength,second_group):
|
|
351
377
|
FA=('FA', 'sum'),
|
352
378
|
GF=('GF', 'sum'),
|
353
379
|
GA=('GA', 'sum'),
|
380
|
+
SF=('SF', 'sum'),
|
381
|
+
SA=('SA', 'sum'),
|
354
382
|
xGF=('xGF', 'sum'),
|
355
383
|
xGA=('xGA', 'sum'),
|
356
384
|
CF=('CF','sum'),
|
@@ -363,9 +391,11 @@ def calc_goalie(pbp,game_strength,second_group):
|
|
363
391
|
RushAG=('RushAG','sum'),
|
364
392
|
).reset_index()
|
365
393
|
|
394
|
+
onice_stats['ShF%'] = onice_stats['GF']/onice_stats['SF']
|
366
395
|
onice_stats['xGF/FF'] = onice_stats['xGF']/onice_stats['FF']
|
367
396
|
onice_stats['GF/xGF'] = onice_stats['GF']/onice_stats['xGF']
|
368
397
|
onice_stats['FshF%'] = onice_stats['GF']/onice_stats['FF']
|
398
|
+
onice_stats['ShA%'] = onice_stats['GA']/onice_stats['SA']
|
369
399
|
onice_stats['xGA/FA'] = onice_stats['xGA']/onice_stats['FA']
|
370
400
|
onice_stats['GA/xGA'] = onice_stats['GA']/onice_stats['xGA']
|
371
401
|
onice_stats['FshA%'] = onice_stats['GA']/onice_stats['FA']
|
wsba_hockey/tools/scraping.py
CHANGED
@@ -533,6 +533,171 @@ def parse_html(info):
|
|
533
533
|
#Return: parsed HTML pbp
|
534
534
|
return data
|
535
535
|
|
536
|
+
### ESPN SCRAPING FUNCTIONS ###
|
537
|
+
def espn_game_id(date,away,home):
|
538
|
+
#Given a date formatted as YYYY-MM-DD and teams, return game id from ESPN schedule
|
539
|
+
date = date.replace("-","")
|
540
|
+
|
541
|
+
#Retreive data
|
542
|
+
api = f"https://site.api.espn.com/apis/site/v2/sports/hockey/nhl/scoreboard?dates={date}"
|
543
|
+
schedule = pd.json_normalize(rs.get(api).json()['events'])
|
544
|
+
|
545
|
+
#Create team abbreviation columns
|
546
|
+
schedule['away_team_abbr'] = schedule['shortName'].str[:3].str.strip(" ")
|
547
|
+
schedule['home_team_abbr'] = schedule['shortName'].str[-3:].str.strip(" ")
|
548
|
+
|
549
|
+
#Modify team abbreviations as necessary
|
550
|
+
schedule = schedule.replace({
|
551
|
+
"LA":"LAK",
|
552
|
+
"NJ":"NJD",
|
553
|
+
"SJ":"SJS",
|
554
|
+
"TB":"TBL",
|
555
|
+
})
|
556
|
+
|
557
|
+
#Retreive game id
|
558
|
+
game_id = schedule.loc[(schedule['away_team_abbr']==away)&
|
559
|
+
(schedule['home_team_abbr']==home),'id'].tolist()[0]
|
560
|
+
|
561
|
+
#Return: ESPN game id
|
562
|
+
return game_id
|
563
|
+
|
564
|
+
def parse_espn(date,away,home):
|
565
|
+
#Given a date formatted as YYYY-MM-DD and teams, return game events
|
566
|
+
game_id = espn_game_id(date,away,home)
|
567
|
+
url = f'https://www.espn.com/nhl/playbyplay/_/gameId/{game_id}'
|
568
|
+
|
569
|
+
#Code modified from Patrick Bacon
|
570
|
+
|
571
|
+
#Retreive game events as json
|
572
|
+
page = rs.get(url, headers={'User-Agent': 'Mozilla/5.0'}, timeout = 500)
|
573
|
+
soup = BeautifulSoup(page.content.decode('ISO-8859-1'), 'lxml', multi_valued_attributes = None)
|
574
|
+
json = json_lib.loads(str(soup).split('"playGrps":')[1].split(',"tms"')[0])
|
575
|
+
|
576
|
+
#DataFrame of time-related info for events
|
577
|
+
clock_df = pd.DataFrame()
|
578
|
+
|
579
|
+
for period in range(0, len(json)):
|
580
|
+
clock_df = clock_df._append(pd.DataFrame(json[period]))
|
581
|
+
|
582
|
+
clock_df = clock_df[~pd.isna(clock_df.clock)]
|
583
|
+
|
584
|
+
# Needed to add .split(',"st":3')[0] for playoffs
|
585
|
+
|
586
|
+
#DataFrame of coordinates for events
|
587
|
+
coords_df = pd.DataFrame(json_lib.loads(str(soup).split('plays":')[1].split(',"st":1')[0].split(',"st":2')[0].split(',"st":3')[0]))
|
588
|
+
|
589
|
+
clock_df = clock_df.assign(
|
590
|
+
clock = clock_df.clock.apply(lambda x: x['displayValue'])
|
591
|
+
)
|
592
|
+
|
593
|
+
coords_df = coords_df.assign(
|
594
|
+
coords_x = coords_df[~pd.isna(coords_df.coordinate)].coordinate.apply(lambda x: x['x']).astype(int),
|
595
|
+
coords_y = coords_df[~pd.isna(coords_df.coordinate)].coordinate.apply(lambda y: y['y']).astype(int),
|
596
|
+
)
|
597
|
+
|
598
|
+
#Combine
|
599
|
+
espn_events = coords_df.merge(clock_df.loc[:, ['id', 'clock']])
|
600
|
+
|
601
|
+
espn_events = espn_events.assign(
|
602
|
+
period = espn_events['period'].apply(lambda x: x['number']),
|
603
|
+
minutes = espn_events['clock'].str.split(':').apply(lambda x: x[0]).astype(int),
|
604
|
+
seconds = espn_events['clock'].str.split(':').apply(lambda x: x[1]).astype(int),
|
605
|
+
event_type = espn_events['type'].apply(lambda x: x['txt'])
|
606
|
+
)
|
607
|
+
|
608
|
+
espn_events = espn_events.assign(coords_x = np.where((pd.isna(espn_events.coords_x)) & (pd.isna(espn_events.coords_y)) &
|
609
|
+
(espn_events.event_type=='Face Off'), 0, espn_events.coords_x
|
610
|
+
),
|
611
|
+
coords_y = np.where((pd.isna(espn_events.coords_x)) & (pd.isna(espn_events.coords_y)) &
|
612
|
+
(espn_events.event_type=='Face Off'), 0, espn_events.coords_y))
|
613
|
+
|
614
|
+
espn_events = espn_events[(~pd.isna(espn_events.coords_x)) & (~pd.isna(espn_events.coords_y))]
|
615
|
+
|
616
|
+
espn_events = espn_events.assign(
|
617
|
+
coords_x = espn_events.coords_x.astype(int),
|
618
|
+
coords_y = espn_events.coords_y.astype(int)
|
619
|
+
)
|
620
|
+
|
621
|
+
#Rename events
|
622
|
+
#The turnover event includes just one player in the event information, meaning takeaways will have no coordinates for play-by-plays created by ESPN scraping
|
623
|
+
espn_events['event_type'] = espn_events['event_type'].replace({
|
624
|
+
"Face Off":'faceoff',
|
625
|
+
"Hit":'hit',
|
626
|
+
"Shot":'shot-on-goal',
|
627
|
+
"Missed":'missed-shot',
|
628
|
+
"Blocked":'blocked-shot',
|
629
|
+
"Goal":'goal',
|
630
|
+
"Delayed Penalty":'delayed-penalty',
|
631
|
+
"Penalty":'penalty',
|
632
|
+
})
|
633
|
+
|
634
|
+
#Period time adjustments (only 'seconds_elapsed' is included in the resulting data)
|
635
|
+
espn_events['period_time_simple'] = espn_events['clock'].str.replace(":","",regex=True)
|
636
|
+
espn_events['period_seconds_elapsed'] = np.where(espn_events['period_time_simple'].str.len()==3,
|
637
|
+
((espn_events['period_time_simple'].str[0].astype(int)*60)+espn_events['period_time_simple'].str[-2:].astype(int)),
|
638
|
+
((espn_events['period_time_simple'].str[0:2].astype(int)*60)+espn_events['period_time_simple'].str[-2:].astype(int)))
|
639
|
+
espn_events['seconds_elapsed'] = ((espn_events['period']-1)*1200)+espn_events['period_seconds_elapsed']
|
640
|
+
|
641
|
+
espn_events = espn_events.rename(columns = {'text':'description'})
|
642
|
+
|
643
|
+
#Add event team
|
644
|
+
espn_events['event_team_abbr'] = espn_events['homeAway'].replace({
|
645
|
+
"away":away,
|
646
|
+
"home":home
|
647
|
+
})
|
648
|
+
|
649
|
+
#Some games (mostly preseason and all star games) do not include coordinates.
|
650
|
+
try:
|
651
|
+
espn_events['x_fixed'] = abs(espn_events['coords_x'])
|
652
|
+
espn_events['y_fixed'] = np.where(espn_events['coords_x']<0,-espn_events['coords_y'],espn_events['coords_y'])
|
653
|
+
espn_events['x_adj'] = np.where(espn_events['homeAway']=="home",espn_events['x_fixed'],-espn_events['x_fixed'])
|
654
|
+
espn_events['y_adj'] = np.where(espn_events['homeAway']=="home",espn_events['y_fixed'],-espn_events['y_fixed'])
|
655
|
+
espn_events['event_distance'] = np.sqrt(((89 - espn_events['x_fixed'])**2) + (espn_events['y_fixed']**2))
|
656
|
+
espn_events['event_angle'] = np.degrees(np.arctan2(abs(espn_events['y_fixed']), abs(89 - espn_events['x_fixed'])))
|
657
|
+
except TypeError:
|
658
|
+
print(f"No coordinates found for ESPN game...")
|
659
|
+
|
660
|
+
espn_events['x_fixed'] = np.nan
|
661
|
+
espn_events['y_fixed'] = np.nan
|
662
|
+
espn_events['x_adj'] = np.nan
|
663
|
+
espn_events['y_adj'] = np.nan
|
664
|
+
espn_events['event_distance'] = np.nan
|
665
|
+
espn_events['event_angle'] = np.nan
|
666
|
+
|
667
|
+
#Assign score and fenwick for each event
|
668
|
+
fenwick_events = ['missed-shot','shot-on-goal','goal']
|
669
|
+
ag = 0
|
670
|
+
ags = []
|
671
|
+
hg = 0
|
672
|
+
hgs = []
|
673
|
+
|
674
|
+
af = 0
|
675
|
+
afs = []
|
676
|
+
hf = 0
|
677
|
+
hfs = []
|
678
|
+
for event,team in zip(list(espn_events['event_type']),list(espn_events['homeAway'])):
|
679
|
+
if event in fenwick_events:
|
680
|
+
if team == "home":
|
681
|
+
hf += 1
|
682
|
+
if event == 'goal':
|
683
|
+
hg += 1
|
684
|
+
else:
|
685
|
+
af += 1
|
686
|
+
if event == 'goal':
|
687
|
+
ag += 1
|
688
|
+
|
689
|
+
ags.append(ag)
|
690
|
+
hgs.append(hg)
|
691
|
+
afs.append(af)
|
692
|
+
hfs.append(hf)
|
693
|
+
|
694
|
+
espn_events['away_score'] = ags
|
695
|
+
espn_events['home_score'] = hgs
|
696
|
+
espn_events['away_fenwick'] = afs
|
697
|
+
espn_events['home_fenwick'] = hfs
|
698
|
+
#Return: play-by-play events in supplied game from ESPN
|
699
|
+
return espn_events
|
700
|
+
|
536
701
|
def assign_target(data):
|
537
702
|
#Assign target number to plays to assist with merging
|
538
703
|
|
@@ -554,12 +719,11 @@ def combine_pbp(info,sources):
|
|
554
719
|
#Route data combining - json if season is after 2009-2010:
|
555
720
|
if str(info['season']) in ['20052006','20062007','20072008','20082009','20092010']:
|
556
721
|
#ESPN x HTML
|
557
|
-
|
558
|
-
|
722
|
+
espn_pbp = parse_espn(str(info['game_date']),info['away_team_abbr'],info['home_team_abbr']).rename(columns={'coords_x':'x',"coords_y":'y'}).sort_values(['period','seconds_elapsed']).reset_index()
|
723
|
+
merge_col = ['period','seconds_elapsed','event_type','event_team_abbr']
|
559
724
|
|
560
725
|
#Merge pbp
|
561
|
-
|
562
|
-
print('In-repair, please try again later...')
|
726
|
+
df = pd.merge(html_pbp,espn_pbp,how='left',on=merge_col)
|
563
727
|
|
564
728
|
else:
|
565
729
|
#JSON x HTML
|
wsba_hockey/workspace.py
CHANGED
@@ -6,7 +6,9 @@ import numpy as np
|
|
6
6
|
|
7
7
|
season_load = wsba.repo_load_seasons()
|
8
8
|
|
9
|
-
select = season_load[
|
9
|
+
select = season_load[0:3]
|
10
|
+
|
11
|
+
data.pbp(select)
|
10
12
|
|
11
13
|
#pbp = data.load_pbp_db(select)
|
12
14
|
|
@@ -26,26 +28,4 @@ select = season_load[9:17]
|
|
26
28
|
#data.fix_names(['skater','goalie'],select)
|
27
29
|
|
28
30
|
## DATA EXPORT ##
|
29
|
-
#data.push_to_sheet(select,['skaters','team','info'])
|
30
|
-
|
31
|
-
wsba.nhl_scrape_game(['2024020008'],remove=[]).to_csv('wtfwhy.csv',index=False)
|
32
|
-
|
33
|
-
pbp = pd.read_parquet('pbp/parquet/nhl_pbp_20242025.parquet')
|
34
|
-
helle = pbp.loc[pbp['event_goalie_id']==8476945,
|
35
|
-
['game_id','period','seconds_elapsed',
|
36
|
-
'strength_state','event_type','description',
|
37
|
-
'event_goalie_id','x','y','xG']]
|
38
|
-
mp = pd.read_csv('shots_2024.csv')
|
39
|
-
goalie = mp.loc[mp['goalieIdForShot']==8476945,
|
40
|
-
['game_id','period','time','event','goalieIdForShot',
|
41
|
-
'xCord','yCord','xGoal']].replace({
|
42
|
-
'SHOT':'shot-on-goal',
|
43
|
-
'MISS':'missed-shot',
|
44
|
-
'GOAL':'goal'
|
45
|
-
})
|
46
|
-
|
47
|
-
helle.to_csv('hellebuyck.csv',index=False)
|
48
|
-
helle['game_id'] = helle['game_id'].astype(str)
|
49
|
-
goalie['game_id'] = ('20240'+goalie['game_id'].astype(str))
|
50
|
-
pd.merge(helle,goalie,how='left',left_on=['game_id','period','seconds_elapsed','event_type','x','y'],right_on=['game_id','period','time','event','xCord','yCord']).to_csv('test.csv',index=False)
|
51
|
-
|
31
|
+
#data.push_to_sheet(select,['skaters','team','info'])
|