wsba-hockey 1.2.1__tar.gz → 1.2.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {wsba_hockey-1.2.1/src/wsba_hockey.egg-info → wsba_hockey-1.2.3}/PKG-INFO +1 -1
- {wsba_hockey-1.2.1 → wsba_hockey-1.2.3}/pyproject.toml +1 -1
- {wsba_hockey-1.2.1 → wsba_hockey-1.2.3}/src/wsba_hockey/__init__.py +2 -0
- wsba_hockey-1.2.3/src/wsba_hockey/tools/columns.py +281 -0
- wsba_hockey-1.2.3/src/wsba_hockey/tools/game_pred.py +33 -0
- {wsba_hockey-1.2.1 → wsba_hockey-1.2.3}/src/wsba_hockey/tools/plotting.py +5 -5
- {wsba_hockey-1.2.1 → wsba_hockey-1.2.3}/src/wsba_hockey/tools/xg_model.py +6 -6
- {wsba_hockey-1.2.1 → wsba_hockey-1.2.3}/src/wsba_hockey/wsba_main.py +165 -94
- {wsba_hockey-1.2.1 → wsba_hockey-1.2.3/src/wsba_hockey.egg-info}/PKG-INFO +1 -1
- {wsba_hockey-1.2.1 → wsba_hockey-1.2.3}/src/wsba_hockey.egg-info/SOURCES.txt +2 -0
- {wsba_hockey-1.2.1 → wsba_hockey-1.2.3}/LICENSE +0 -0
- {wsba_hockey-1.2.1 → wsba_hockey-1.2.3}/README.md +0 -0
- {wsba_hockey-1.2.1 → wsba_hockey-1.2.3}/setup.cfg +0 -0
- {wsba_hockey-1.2.1 → wsba_hockey-1.2.3}/src/wsba_hockey/tools/__init__.py +0 -0
- {wsba_hockey-1.2.1 → wsba_hockey-1.2.3}/src/wsba_hockey/tools/agg.py +0 -0
- {wsba_hockey-1.2.1 → wsba_hockey-1.2.3}/src/wsba_hockey/tools/archive/old_scraping.py +0 -0
- {wsba_hockey-1.2.1 → wsba_hockey-1.2.3}/src/wsba_hockey/tools/scraping.py +0 -0
- {wsba_hockey-1.2.1 → wsba_hockey-1.2.3}/src/wsba_hockey/tools/utils/__init__.py +0 -0
- {wsba_hockey-1.2.1 → wsba_hockey-1.2.3}/src/wsba_hockey/tools/utils/shared.py +0 -0
- {wsba_hockey-1.2.1 → wsba_hockey-1.2.3}/src/wsba_hockey.egg-info/dependency_links.txt +0 -0
- {wsba_hockey-1.2.1 → wsba_hockey-1.2.3}/src/wsba_hockey.egg-info/top_level.txt +0 -0
- {wsba_hockey-1.2.1 → wsba_hockey-1.2.3}/tests/tests.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: wsba_hockey
|
3
|
-
Version: 1.2.
|
3
|
+
Version: 1.2.3
|
4
4
|
Summary: WeakSide Breakout's complete Python package of access to hockey data, primairly including the scraping of National Hockey League schedule, play-by-play, and shifts information.
|
5
5
|
Author-email: Owen Singh <owenbksingh@gmail.com>
|
6
6
|
Project-URL: Homepage, https://github.com/owensingh38/wsba_hockey/
|
@@ -0,0 +1,281 @@
|
|
1
|
+
### COLUMN NAMES ###
|
2
|
+
# Provided in this file are dicts to standardize column names as necessary #
|
3
|
+
|
4
|
+
def col_map():
|
5
|
+
return {
|
6
|
+
'schedule':{
|
7
|
+
'id':'game_id',
|
8
|
+
'game_date':'game_date',
|
9
|
+
'gameType':'season_type',
|
10
|
+
'neutralSite':'neutral_site',
|
11
|
+
'startTimeUTC':'start_time_utc',
|
12
|
+
'start_time_est':'start_time_est',
|
13
|
+
'easternUTCOffset':'eastern_utc',
|
14
|
+
'venueUTCOffset':'venue_utc',
|
15
|
+
'venueTimezone':'venue_timezone',
|
16
|
+
'gameState':'game_state',
|
17
|
+
'gameScheduleState':'game_schedule_state',
|
18
|
+
'tvBroadcasts':'tv_broadcasts',
|
19
|
+
'threeMinRecap':'three_min_recap',
|
20
|
+
'threeMinRecapFr':'three_min_recap_fr',
|
21
|
+
'condensedGame':'condensed_game',
|
22
|
+
'condensedGameFr':'condensed_game_fr',
|
23
|
+
'gameCenterLink':'gamecenter_link',
|
24
|
+
'venue.default':'venue',
|
25
|
+
'awayTeam.id':'away_team_id',
|
26
|
+
'homeTeam.id':'home_team_id',
|
27
|
+
'awayTeam.abbrev':'away_team_abbr',
|
28
|
+
'homeTeam.abbrev':'home_team_abbr',
|
29
|
+
'awayTeam.darkLogo':'away_team_logo',
|
30
|
+
'homeTeam.darkLogo':'home_team_logo',
|
31
|
+
'awayTeam.awaySplitSquad':'away_team_split_squad',
|
32
|
+
'homeTeam.homeSplitSquad':'home_team_split_squad',
|
33
|
+
'awayTeam.score':'away_score',
|
34
|
+
'homeTeam.score':'home_score',
|
35
|
+
'periodDescriptor.number':'period',
|
36
|
+
'periodDescriptor.maxRegulationPeriods':'period_max_regulation',
|
37
|
+
'periodDescriptor.periodType':'period_type',
|
38
|
+
'gameOutcome.lastPeriodType':'period_type_last',
|
39
|
+
'seriesUrl':'series_url',
|
40
|
+
'seriesStatus.round':'series_round',
|
41
|
+
'seriesStatus.seriesAbbrev':'series_abbr',
|
42
|
+
'seriesStatus.seriesTitle':'series_name',
|
43
|
+
'seriesStatus.seriesLetter':'series_letter',
|
44
|
+
'seriesStatus.neededToWin':'series_games_needed_to_win',
|
45
|
+
'seriesStatus.gameNumberOfSeries':'series_game_num',
|
46
|
+
'seriesStatus.topSeedTeamAbbrev':'top_seed_team_abbr',
|
47
|
+
'seriesStatus.bottomSeedTeamAbbrev':'bottom_seed_team_abbr',
|
48
|
+
'seriesStatus.topSeedWins':'top_seed_wins',
|
49
|
+
'seriesStatus.bottomSeedWins':'bottom_seed_wins'
|
50
|
+
},
|
51
|
+
'season_info':{
|
52
|
+
'id':'season',
|
53
|
+
'formattedSeasonId':'season_name',
|
54
|
+
'seasonOrdinal':'season_ordinal',
|
55
|
+
'startDate':'start_date',
|
56
|
+
'endDate':'end_date',
|
57
|
+
'numberOfGames':'games_num',
|
58
|
+
'totalRegularSeasonGames':'regular_season_total_games_num',
|
59
|
+
'totalPlayoffGames':'playoffs_total_games_num',
|
60
|
+
'regularSeasonStartDate':'regular_season_start_date',
|
61
|
+
'regularSeasonEndDate':'regular_season_end_date',
|
62
|
+
'standingsStart':'standings_start_date',
|
63
|
+
'standingsEnd':'standings_end_date',
|
64
|
+
'allStarGameInUse':'all_star_game',
|
65
|
+
'conferencesInUse':'conferences',
|
66
|
+
'divisionsInUse':'divisions',
|
67
|
+
'wildcardInUse':'wildcard',
|
68
|
+
'entryDraftInUse':'entry_draft',
|
69
|
+
'nhlStanleyCupOwner':'nhl_stanley_cup_owner',
|
70
|
+
'olympicsParticipation':'olympic_participation',
|
71
|
+
'tiesInUse':'ties',
|
72
|
+
'pointForOTLossInUse':'ot_loss_point',
|
73
|
+
'regulationWinsInUse':'reg_wins',
|
74
|
+
'rowInUse':'row',
|
75
|
+
'supplementalDraftInUse':'supplemental_draft'
|
76
|
+
},
|
77
|
+
'standings':{
|
78
|
+
"clinchIndicator": "clinch_indicator",
|
79
|
+
"conferenceAbbrev": "conference_abbr",
|
80
|
+
"conferenceHomeSequence": "conference_home_sequence",
|
81
|
+
"conferenceL10Sequence": "conference_l10_sequence",
|
82
|
+
"conferenceName": "conference_name",
|
83
|
+
"conferenceRoadSequence": "conference_road_sequence",
|
84
|
+
"conferenceSequence": "conference_sequence",
|
85
|
+
"date": "date",
|
86
|
+
"divisionAbbrev": "division_abbr",
|
87
|
+
"divisionHomeSequence": "division_home_sequence",
|
88
|
+
"divisionL10Sequence": "division_l10_sequence",
|
89
|
+
"divisionName": "division_name",
|
90
|
+
"divisionRoadSequence": "division_road_sequence",
|
91
|
+
"divisionSequence": "division_sequence",
|
92
|
+
"gameTypeId": "game_type_id",
|
93
|
+
"gamesPlayed": "GP",
|
94
|
+
"goalDifferential": "GD",
|
95
|
+
"goalDifferentialPctg": "GD%",
|
96
|
+
"goalAgainst": "GA",
|
97
|
+
"goalFor": "GF",
|
98
|
+
"goalsForPctg": "GF%",
|
99
|
+
"homeGamesPlayed": "HGP",
|
100
|
+
"homeGoalDifferential": "HGD",
|
101
|
+
"homeGoalsAgainst": "HGA",
|
102
|
+
"homeGoalsFor": "HGF",
|
103
|
+
"homeLosses": "HL",
|
104
|
+
"homeOtLosses": "HOL",
|
105
|
+
"homePoints": "HPTS",
|
106
|
+
"homeRegulationPlusOtWins": "HROW",
|
107
|
+
"homeRegulationWins": "HRW",
|
108
|
+
"homeTies": "HT",
|
109
|
+
"homeWins": "HW",
|
110
|
+
"l10GamesPlayed": "l10_GP",
|
111
|
+
"l10GoalDifferential": "l10_GD",
|
112
|
+
"l10GoalsAgainst": "l10_GA",
|
113
|
+
"l10GoalsFor": "l10_GF",
|
114
|
+
"l10Losses": "l10_L",
|
115
|
+
"l10OtLosses": "l10_OTL",
|
116
|
+
"l10Points": "l10_PTS",
|
117
|
+
"l10RegulationPlusOtWins": "l10_ROW",
|
118
|
+
"l10RegulationWins": "l10_RW",
|
119
|
+
"l10Ties": "l10_T",
|
120
|
+
"l10Wins": "l10_W",
|
121
|
+
"leagueHomeSequence": "league_home_sequence",
|
122
|
+
"leagueL10Sequence": "league_l10_sequence",
|
123
|
+
"leagueRoadSequence": "league_road_sequence",
|
124
|
+
"leagueSequence": "league_sequence",
|
125
|
+
"losses": "L",
|
126
|
+
"otLosses": "OTL",
|
127
|
+
"pointPctg": "PTS%",
|
128
|
+
"points": "PTS",
|
129
|
+
"regulationPlusOtWinPctg": "ROW%",
|
130
|
+
"regulationPlusOtWins": "ROW",
|
131
|
+
"regulationWinPctg": "RW%",
|
132
|
+
"regulationWins": "RW",
|
133
|
+
"roadGamesPlayed": "AGP",
|
134
|
+
"roadGoalDifferential": "AGD",
|
135
|
+
"roadGoalsAgainst": "AGA",
|
136
|
+
"roadGoalsFor": "AGF",
|
137
|
+
"roadLosses": "AL",
|
138
|
+
"roadOtLosses": "AOTL",
|
139
|
+
"roadPoints": "APTS",
|
140
|
+
"roadRegulationPlusOtWins": "AROW",
|
141
|
+
"roadRegulationWins": "ARW",
|
142
|
+
"roadTies": "AT",
|
143
|
+
"roadWins": "AW",
|
144
|
+
"seasonId": "season",
|
145
|
+
"shootoutLosses": "SOL",
|
146
|
+
"shootoutWins": "SOW",
|
147
|
+
"streakCode": "streak_code",
|
148
|
+
"streakCount": "streak_count",
|
149
|
+
"teamLogo": "team_logo",
|
150
|
+
"ties": "T",
|
151
|
+
"waiversSequence": "waivers_sequence",
|
152
|
+
"wildcardSequence": "wildcard_sequence",
|
153
|
+
"winPctg": "W%",
|
154
|
+
"wins": "W",
|
155
|
+
"placeName.default": "place_name",
|
156
|
+
"teamName.default": "team_name",
|
157
|
+
"teamCommonName.default": "team_common_name",
|
158
|
+
"teamAbbrev.default": "team_abbr"
|
159
|
+
},
|
160
|
+
'roster':{
|
161
|
+
"id": "player_id",
|
162
|
+
"team_abbr": "team_abbr",
|
163
|
+
"season": "season",
|
164
|
+
"headshot": "headshot",
|
165
|
+
"sweaterNumber": "sweater_number",
|
166
|
+
"positionCode": "position",
|
167
|
+
"heading_position": "heading_position",
|
168
|
+
"shootsCatches": "handedness",
|
169
|
+
"heightInInches": "height_in",
|
170
|
+
"weightInPounds": "weight_lbs",
|
171
|
+
"heightInCentimeters": "height_cm",
|
172
|
+
"weightInKilograms": "weight_kg",
|
173
|
+
"birthDate": "birth_date",
|
174
|
+
"birthCountry": "birth_country",
|
175
|
+
"player_name":"player_name",
|
176
|
+
"firstName.default": "player_first_name",
|
177
|
+
"lastName.default": "player_last_name",
|
178
|
+
"birthCity.default": "birth_city",
|
179
|
+
"birthStateProvince.default": "birth_state_province"
|
180
|
+
},
|
181
|
+
'prospects':{
|
182
|
+
"id": "player_id",
|
183
|
+
"headshot": "headshot",
|
184
|
+
"sweaterNumber": "sweater_number",
|
185
|
+
"positionCode": "position",
|
186
|
+
"shootsCatches": "handedness",
|
187
|
+
"heightInInches": "height_in",
|
188
|
+
"weightInPounds": "weight_lbs",
|
189
|
+
"heightInCentimeters": "height_cm",
|
190
|
+
"weightInKilograms": "weight_kg",
|
191
|
+
"birthDate": "birth_date",
|
192
|
+
"birthCountry": "birth_country",
|
193
|
+
"player_name":"player_name",
|
194
|
+
"firstName.default": "player_first_name",
|
195
|
+
"lastName.default": "player_last_name",
|
196
|
+
"birthCity.default": "birth_city",
|
197
|
+
"birthStateProvince.default": "birth_state_province",
|
198
|
+
},
|
199
|
+
'player_info':{
|
200
|
+
'playerId': 'player_id',
|
201
|
+
'player_name': 'player_name',
|
202
|
+
'isActive': 'is_active',
|
203
|
+
'currentTeamId': 'current_team_id',
|
204
|
+
'currentTeamAbbrev': 'current_team_abbr',
|
205
|
+
'badges': 'badges',
|
206
|
+
'teamLogo': 'team_logo',
|
207
|
+
'sweaterNumber': 'sweater_number',
|
208
|
+
'position': 'position',
|
209
|
+
'headshot': 'headshot',
|
210
|
+
'heroImage': 'hero_image',
|
211
|
+
'heightInInches': 'height_in',
|
212
|
+
'heightInCentimeters': 'height_cm',
|
213
|
+
'weightInPounds': 'weight_lbs',
|
214
|
+
'weightInKilograms': 'weight_kg',
|
215
|
+
'birthDate': 'birth_date',
|
216
|
+
'birthCountry': 'birth_country',
|
217
|
+
'shootsCatches': 'handedness',
|
218
|
+
'playerSlug': 'player_slug',
|
219
|
+
'inTop100AllTime': 'in_top_100_all_time',
|
220
|
+
'inHHOF': 'in_hhof',
|
221
|
+
'shopLink': 'shop_link',
|
222
|
+
'twitterLink': 'twitter_link',
|
223
|
+
'watchLink': 'watch_link',
|
224
|
+
'last5Games': 'last_5_games',
|
225
|
+
'seasonTotals': 'season_totals',
|
226
|
+
'awards': 'awards',
|
227
|
+
'currentTeamRoster': 'current_team_roster',
|
228
|
+
'fullTeamName.default': 'full_team_name',
|
229
|
+
'teamCommonName.default': 'team_common_name',
|
230
|
+
'teamPlaceNameWithPreposition.default': 'team_place_name_with_preposition',
|
231
|
+
'firstName.default': 'player_first_name',
|
232
|
+
'lastName.default': 'player_last_name',
|
233
|
+
'birthCity.default': 'birth_city',
|
234
|
+
'birthStateProvince.default': 'birth_state_province',
|
235
|
+
'draftDetails.year': 'draft_year',
|
236
|
+
'draftDetails.teamAbbrev': 'draft_team_abbr',
|
237
|
+
'draftDetails.round': 'draft_round',
|
238
|
+
'draftDetails.pickInRound': 'draft_pick_in_round',
|
239
|
+
'draftDetails.overallPick': 'draft_overall_pick',
|
240
|
+
},
|
241
|
+
'team_info':{
|
242
|
+
"id":"team_id",
|
243
|
+
"franchiseId":"franchise_id",
|
244
|
+
"fullName":"team_name",
|
245
|
+
"leagueId":"league_id",
|
246
|
+
"triCode":"team_abbr",
|
247
|
+
"logo_light":"logo_light",
|
248
|
+
"logo_dark":"logo_dark",
|
249
|
+
"country3Code":"country_abbr",
|
250
|
+
"countryCode":"country_abbr_2",
|
251
|
+
"countryName":"country_name",
|
252
|
+
"hasPlayerStats":"has_player_stats",
|
253
|
+
"imageUrl":"country_flag_large",
|
254
|
+
"isActive":"is_active",
|
255
|
+
"nationalityName":"nationality",
|
256
|
+
"olympicUrl":"olympic_url",
|
257
|
+
"thumbnailUrl":"thumbnail_url"
|
258
|
+
},
|
259
|
+
'draft_rankings':{
|
260
|
+
"id": "player_id",
|
261
|
+
"player_name":"player_name",
|
262
|
+
"firstName": "player_first_name",
|
263
|
+
"lastName": "player_last_name",
|
264
|
+
"headshot": "headshot",
|
265
|
+
"sweaterNumber": "sweater_number",
|
266
|
+
"positionCode": "position",
|
267
|
+
"shootsCatches": "handedness",
|
268
|
+
"heightInInches": "height_in",
|
269
|
+
"weightInPounds": "weight_lbs",
|
270
|
+
"heightInCentimeters": "height_cm",
|
271
|
+
"weightInKilograms": "weight_kg",
|
272
|
+
"birthDate": "birth_date",
|
273
|
+
"birthCity": "birth_city",
|
274
|
+
"birthCountry": "birth_country",
|
275
|
+
"birthStateProvince": "birth_state_province",
|
276
|
+
"lastAmateurClub": "last_amateur_club",
|
277
|
+
"lastAmateurLeague": "last_amateur_league",
|
278
|
+
"midtermRank":"midterm_rank",
|
279
|
+
"finalRank":"final_rank"
|
280
|
+
}
|
281
|
+
}
|
@@ -0,0 +1,33 @@
|
|
1
|
+
import joblib
|
2
|
+
import os
|
3
|
+
import pandas as pd
|
4
|
+
import numpy as np
|
5
|
+
import xgboost as xgb
|
6
|
+
import scipy.sparse as sp
|
7
|
+
import wsba_hockey.wsba_main as wsba
|
8
|
+
import wsba_hockey.tools.scraping as scraping
|
9
|
+
import matplotlib.pyplot as plt
|
10
|
+
from sklearn.calibration import calibration_curve
|
11
|
+
from sklearn.metrics import roc_curve, auc
|
12
|
+
|
13
|
+
### GAME PREDICTION MODEL FUNCTIONS ###
|
14
|
+
# Provided in this file are functions vital to the game prediction model in the WSBA Hockey Python package. #
|
15
|
+
|
16
|
+
## GLOBAL VARIABLES ##
|
17
|
+
dir = os.path.dirname(os.path.realpath(__file__))
|
18
|
+
roster_path = os.path.join(dir,'rosters\\nhl_rosters.csv')
|
19
|
+
schedule_path = os.path.join(dir,'schedule/schedule.csv')
|
20
|
+
|
21
|
+
def prep_game_data(pbp):
|
22
|
+
#Prepare schedule data for model development given full-season pbp
|
23
|
+
|
24
|
+
#Calculate necessary team stats (by game) for the prediction model
|
25
|
+
#The model will evaluate based on three different qualities for valid EV, PP, and SH strength
|
26
|
+
dfs = []
|
27
|
+
for strength in [['5v5'],['5v4'],['4v5']]:
|
28
|
+
team_games = wsba.nhl_calculate_stats(pbp,'team',[2,3],strength,True)
|
29
|
+
team_games['Year'] = team_games['Season'].str[0:4].astype(int)
|
30
|
+
dfs.append(team_games)
|
31
|
+
|
32
|
+
#Place the games in order and create sums for
|
33
|
+
df = pd.concat(dfs).sort_values(by=['Year','Game'])
|
@@ -50,7 +50,7 @@ def prep_plot_data(pbp,events,strengths,marker_dict=event_markers):
|
|
50
50
|
pbp = wsba_xG(pbp)
|
51
51
|
pbp['xG'] = np.where(pbp['xG'].isna(),0,pbp['xG'])
|
52
52
|
|
53
|
-
pbp['WSBA'] = pbp['
|
53
|
+
pbp['WSBA'] = pbp['event_player_1_id'].astype(str)+pbp['season'].astype(str)+pbp['event_team_abbr']
|
54
54
|
|
55
55
|
pbp['x_plot'] = np.where(pbp['x']<0,-pbp['y_adj'],pbp['y_adj'])
|
56
56
|
pbp['y_plot'] = abs(pbp['x_adj'])
|
@@ -88,8 +88,8 @@ def plot_skater_shots(pbp, player, season, team, strengths, title = None, marker
|
|
88
88
|
pbp = pbp.loc[(pbp['season'].astype(str)==season)&((pbp['away_team_abbr']==team)|(pbp['home_team_abbr']==team))]
|
89
89
|
|
90
90
|
team_data = pd.read_csv(info_path)
|
91
|
-
team_color = list(team_data.loc[team_data['WSBA']==f'{team}{season}','
|
92
|
-
team_color_2nd = list(team_data.loc[team_data['WSBA']==f'{team}{season}','
|
91
|
+
team_color = list(team_data.loc[team_data['WSBA']==f'{team}{season}','primary_color'])[0]
|
92
|
+
team_color_2nd = list(team_data.loc[team_data['WSBA']==f'{team}{season}','secondary_color'])[0]
|
93
93
|
|
94
94
|
if onice in ['for','against']:
|
95
95
|
skater = pbp.loc[(pbp[f'onice_{onice}'].str.contains(player.upper()))]
|
@@ -123,8 +123,8 @@ def plot_game_events(pbp,game_id,events,strengths,marker_dict=event_markers,team
|
|
123
123
|
|
124
124
|
team_data = pd.read_csv(info_path)
|
125
125
|
team_info ={
|
126
|
-
'away_color':'#000000' if list(team_data.loc[team_data['WSBA']==f'{away_abbr}{season}','
|
127
|
-
'home_color': list(team_data.loc[team_data['WSBA']==f'{home_abbr}{season}',f'{team_colors['home']
|
126
|
+
'away_color':'#000000' if list(team_data.loc[team_data['WSBA']==f'{away_abbr}{season}','secondary_color'])[0]=='#FFFFFF' else list(team_data.loc[team_data['WSBA']==f'{away_abbr}{season}',f'{team_colors['away']}_color'])[0],
|
127
|
+
'home_color': list(team_data.loc[team_data['WSBA']==f'{home_abbr}{season}',f'{team_colors['home']}_color'])[0],
|
128
128
|
'away_logo': f'tools/logos/png/{away_abbr}{season}.png',
|
129
129
|
'home_logo': f'tools/logos/png/{home_abbr}{season}.png',
|
130
130
|
}
|
@@ -90,18 +90,18 @@ def fix_players(pbp):
|
|
90
90
|
print('Adding player info to pbp...')
|
91
91
|
|
92
92
|
#Load roster and all players
|
93
|
-
roster = pd.read_csv(roster_path).drop_duplicates(['
|
93
|
+
roster = pd.read_csv(roster_path).drop_duplicates(['player_id'])[['player_name','player_id','handedness']]
|
94
94
|
|
95
95
|
#Some players are missing from the roster file (generally in newer seasons); add these manually
|
96
|
-
miss = list(pbp.loc[~(pbp['event_player_1_id'].isin(list(roster['
|
96
|
+
miss = list(pbp.loc[~(pbp['event_player_1_id'].isin(list(roster['player_id'])))&(pbp['event_player_1_id'].notna()),'event_player_1_id'].drop_duplicates())
|
97
97
|
if miss:
|
98
|
-
add = wsba.
|
98
|
+
add = wsba.nhl_scrape_player_info(miss)[['player_name','player_id','handedness']]
|
99
99
|
roster = pd.concat([roster,add]).reset_index(drop=True)
|
100
100
|
|
101
101
|
#Conversion dict
|
102
|
-
roster['
|
103
|
-
roster_dict = roster.set_index('
|
104
|
-
names_dict = roster.set_index('
|
102
|
+
roster['player_id'] = roster['player_id'].astype(str)
|
103
|
+
roster_dict = roster.set_index('player_id').to_dict()['handedness']
|
104
|
+
names_dict = roster.set_index('player_id').to_dict()['player_name']
|
105
105
|
|
106
106
|
#Add player names
|
107
107
|
for i in range(3):
|
@@ -11,6 +11,7 @@ from wsba_hockey.tools.scraping import *
|
|
11
11
|
from wsba_hockey.tools.xg_model import *
|
12
12
|
from wsba_hockey.tools.agg import *
|
13
13
|
from wsba_hockey.tools.plotting import *
|
14
|
+
from wsba_hockey.tools.columns import col_map
|
14
15
|
|
15
16
|
### WSBA HOCKEY ###
|
16
17
|
## Provided below are all integral functions in the WSBA Hockey Python package. ##
|
@@ -105,7 +106,7 @@ KNOWN_PROBS = {
|
|
105
106
|
|
106
107
|
SHOT_TYPES = ['wrist','deflected','tip-in','slap','backhand','snap','wrap-around','poke','bat','cradle','between-legs']
|
107
108
|
|
108
|
-
NEW =
|
109
|
+
NEW = 2025
|
109
110
|
|
110
111
|
EVENTS = ['faceoff','hit','giveaway','takeaway','blocked-shot','missed-shot','shot-on-goal','goal','penalty']
|
111
112
|
|
@@ -114,8 +115,19 @@ SCHEDULE_PATH = os.path.join(DIR,'tools\\schedule\\schedule.csv')
|
|
114
115
|
INFO_PATH = os.path.join(DIR,'tools\\teaminfo\\nhl_teaminfo.csv')
|
115
116
|
DEFAULT_ROSTER = os.path.join(DIR,'tools\\rosters\\nhl_rosters.csv')
|
116
117
|
|
118
|
+
#Load column names for standardization
|
119
|
+
COL_MAP = col_map()
|
120
|
+
|
121
|
+
DRAFT_CAT = {
|
122
|
+
0: 'All Prospects',
|
123
|
+
1: 'North American Skaters',
|
124
|
+
2: 'International Skater',
|
125
|
+
3: 'North American Goalies',
|
126
|
+
4: 'International Goalies'
|
127
|
+
}
|
128
|
+
|
117
129
|
## SCRAPE FUNCTIONS ##
|
118
|
-
def nhl_scrape_game(game_ids:list[int], split_shifts:bool = False, remove:list[str] = [], verbose:bool = False, sources:bool = False, errors:bool = False):
|
130
|
+
def nhl_scrape_game(game_ids:int | list[int], split_shifts:bool = False, remove:list[str] = [], verbose:bool = False, sources:bool = False, errors:bool = False):
|
119
131
|
"""
|
120
132
|
Given a set of game_ids (NHL API), return complete play-by-play information as requested.
|
121
133
|
|
@@ -319,24 +331,17 @@ def nhl_scrape_schedule(season:int, start:str = '', end:str = ''):
|
|
319
331
|
if gameWeek.empty:
|
320
332
|
game.append(gameWeek)
|
321
333
|
else:
|
322
|
-
gameWeek['
|
323
|
-
|
324
|
-
gameWeek['
|
325
|
-
gameWeek['away_team_abbr'] = gameWeek['awayTeam.abbrev']
|
326
|
-
gameWeek['home_team_abbr'] = gameWeek['homeTeam.abbrev']
|
327
|
-
gameWeek['game_title'] = gameWeek['away_team_abbr'] + " @ " + gameWeek['home_team_abbr'] + " - " + gameWeek['date']
|
328
|
-
gameWeek['estStartTime'] = pd.to_datetime(gameWeek['startTimeUTC']).dt.tz_convert('US/Eastern').dt.strftime("%I:%M %p")
|
329
|
-
|
330
|
-
front_col = ['id','season','date','season_type','game_title','away_team_abbr','home_team_abbr','estStartTime']
|
331
|
-
gameWeek = gameWeek[front_col+[col for col in gameWeek.columns.to_list() if col not in front_col]]
|
334
|
+
gameWeek['game_date'] = get['gameWeek'][0]['date']
|
335
|
+
gameWeek['game_title'] = gameWeek['awayTeam.abbrev'] + " @ " + gameWeek['homeTeam.abbrev'] + " - " + gameWeek['game_date']
|
336
|
+
gameWeek['start_time_est'] = pd.to_datetime(gameWeek['startTimeUTC']).dt.tz_convert('US/Eastern').dt.strftime("%I:%M %p")
|
332
337
|
|
333
338
|
game.append(gameWeek)
|
334
339
|
|
335
|
-
#Concatenate all games
|
336
|
-
df = pd.concat(game)
|
340
|
+
#Concatenate all games and standardize column naming
|
341
|
+
df = pd.concat(game).rename(columns=COL_MAP['schedule'],errors='ignore')
|
337
342
|
|
338
343
|
#Return: specificed schedule data
|
339
|
-
return df
|
344
|
+
return df[[col for col in COL_MAP['schedule'].values() if col in df.columns]]
|
340
345
|
|
341
346
|
def nhl_scrape_season(season:int, split_shifts:bool = False, season_types:list[int] = [2,3], remove:list[str] = [], start:str = '', end:str = '', local:bool=False, local_path:str = SCHEDULE_PATH, verbose:bool = False, sources:bool = False, errors:bool = False):
|
342
347
|
"""
|
@@ -399,13 +404,13 @@ def nhl_scrape_season(season:int, split_shifts:bool = False, season_types:list[i
|
|
399
404
|
|
400
405
|
load = load.loc[(load['season']==season)&
|
401
406
|
(load['season_type'].isin(season_types))&
|
402
|
-
(load['
|
407
|
+
(load['game_date']>=start)&(load['game_date']<=end)]
|
403
408
|
|
404
|
-
game_ids = load['
|
409
|
+
game_ids = load['game_id'].to_list()
|
405
410
|
else:
|
406
411
|
load = nhl_scrape_schedule(season,start,end)
|
407
412
|
load = load.loc[(load['season']==season)&(load['season_type'].isin(season_types))]
|
408
|
-
game_ids = load['
|
413
|
+
game_ids = load['game_id'].to_list()
|
409
414
|
|
410
415
|
#If no games found, terminate the process
|
411
416
|
if not game_ids:
|
@@ -440,10 +445,9 @@ def nhl_scrape_seasons_info(seasons:list[int] = []):
|
|
440
445
|
A DataFrame containing the information for requested seasons.
|
441
446
|
"""
|
442
447
|
|
443
|
-
#
|
444
|
-
# param 'season' - list of seasons to include
|
445
|
-
|
446
448
|
print(f'Scraping info for seasons: {seasons}')
|
449
|
+
|
450
|
+
#Load two different data sources: general season info and standings data related to season
|
447
451
|
api = "https://api.nhle.com/stats/rest/en/season"
|
448
452
|
info = "https://api-web.nhle.com/v1/standings-season"
|
449
453
|
data = rs.get(api).json()['data']
|
@@ -452,21 +456,26 @@ def nhl_scrape_seasons_info(seasons:list[int] = []):
|
|
452
456
|
df = pd.json_normalize(data)
|
453
457
|
df_2 = pd.json_normalize(data_2)
|
454
458
|
|
455
|
-
|
459
|
+
#Remove common columns
|
460
|
+
df_2 = df_2.drop(columns=['conferencesInUse', 'divisionsInUse', 'pointForOTlossInUse','rowInUse','tiesInUse','wildcardInUse'])
|
456
461
|
|
462
|
+
df = pd.merge(df,df_2,how='outer',on=['id']).rename(columns=COL_MAP['season_info'])
|
463
|
+
|
464
|
+
df = df[[col for col in COL_MAP['season_info'].values() if col in df.columns]]
|
465
|
+
|
457
466
|
if len(seasons) > 0:
|
458
|
-
return df.loc[df['
|
467
|
+
return df.loc[df['season'].isin(seasons)].sort_values(by=['season'])
|
459
468
|
else:
|
460
|
-
return df.sort_values(by=['
|
469
|
+
return df.sort_values(by=['season'])
|
461
470
|
|
462
|
-
def nhl_scrape_standings(arg:
|
471
|
+
def nhl_scrape_standings(arg:int | list[int] | Literal['now'] = 'now', season_type:int = 2):
|
463
472
|
"""
|
464
473
|
Returns standings or playoff bracket
|
465
474
|
Args:
|
466
|
-
arg (
|
467
|
-
Date formatted as 'YYYY-MM-DD' to scrape standings, NHL season such as "20242025", or 'now' for current standings. Default is 'now'.
|
475
|
+
arg (int or list[int] or str, optional):
|
476
|
+
Date formatted as 'YYYY-MM-DD' to scrape standings, NHL season such as "20242025", list of NHL seasons, or 'now' for current standings. Default is 'now'.
|
468
477
|
season_type (int, optional):
|
469
|
-
Part of season to scrape. If 3 (playoffs) then scrape the playoff bracket for the season implied by arg. When arg = 'now' this is
|
478
|
+
Part of season to scrape. If 3 (playoffs) then scrape the playoff bracket for the season implied by arg. When arg = 'now' this is defaulted to the most recent playoff year. Any dates passed through are parsed as seasons. Default is 2.
|
470
479
|
|
471
480
|
Returns:
|
472
481
|
pd.DataFrame:
|
@@ -475,31 +484,62 @@ def nhl_scrape_standings(arg:str | int = "now", season_type:int = 2):
|
|
475
484
|
|
476
485
|
if season_type == 3:
|
477
486
|
if arg == "now":
|
478
|
-
arg = NEW
|
487
|
+
arg = [NEW]
|
488
|
+
elif type(arg) == int:
|
489
|
+
#Find year from season
|
490
|
+
arg = [str(arg)[4:8]]
|
491
|
+
elif type(arg) == list:
|
492
|
+
#Find year from seasons
|
493
|
+
arg = [str(s)[4:8] for s in arg]
|
494
|
+
else:
|
495
|
+
#Find year from season from date
|
496
|
+
arg = [int(arg[0:4])+1 if (9 < int(arg[5:7]) < 13) else int(arg[0:4])]
|
479
497
|
|
480
|
-
print(f"Scraping playoff bracket for
|
481
|
-
|
498
|
+
print(f"Scraping playoff bracket for season{'s' if len(arg)>1 else ''}: {arg}")
|
499
|
+
|
500
|
+
dfs = []
|
501
|
+
for season in arg:
|
502
|
+
api = f"https://api-web.nhle.com/v1/playoff-bracket/{season}"
|
482
503
|
|
483
|
-
|
504
|
+
data = rs.get(api).json()['series']
|
505
|
+
dfs.append(pd.json_normalize(data))
|
484
506
|
|
485
|
-
|
507
|
+
#Return: playoff bracket
|
508
|
+
return pd.concat(dfs)
|
486
509
|
|
487
510
|
else:
|
488
511
|
if arg == "now":
|
489
512
|
print("Scraping standings as of now...")
|
513
|
+
arg = [arg]
|
490
514
|
elif arg in SEASONS:
|
491
515
|
print(f'Scraping standings for season: {arg}')
|
516
|
+
arg = [arg]
|
517
|
+
elif type(arg) == list:
|
518
|
+
print(f'Scraping standings for seasons: {arg}')
|
492
519
|
else:
|
493
520
|
print(f"Scraping standings for date: {arg}")
|
521
|
+
arg = [arg]
|
522
|
+
|
523
|
+
dfs = []
|
524
|
+
for search in arg:
|
525
|
+
#If the end is an int then its a season otherwise it is either 'now' or a date as a string
|
526
|
+
if type(search) == int:
|
527
|
+
season_data = rs.get('https://api.nhle.com/stats/rest/en/season').json()['data']
|
528
|
+
season_data = [s for s in season_data if s['id'] == search][0]
|
529
|
+
end = season_data['regularSeasonEndDate'][0:10]
|
530
|
+
else:
|
531
|
+
end = search
|
532
|
+
|
533
|
+
api = f"https://api-web.nhle.com/v1/standings/{end}"
|
494
534
|
|
495
|
-
|
496
|
-
|
497
|
-
end = season_data['regularSeasonEndDate'][0:10]
|
535
|
+
data = rs.get(api).json()['standings']
|
536
|
+
dfs.append(pd.json_normalize(data))
|
498
537
|
|
499
|
-
|
500
|
-
|
538
|
+
#Standardize columns
|
539
|
+
df = pd.concat(dfs).rename(columns=COL_MAP['standings'])
|
501
540
|
|
502
|
-
|
541
|
+
#Return: standings data
|
542
|
+
return df[[col for col in COL_MAP['standings'].values() if col in df.columns]]
|
503
543
|
|
504
544
|
def nhl_scrape_roster(season: int):
|
505
545
|
"""
|
@@ -514,33 +554,40 @@ def nhl_scrape_roster(season: int):
|
|
514
554
|
A DataFrame containing the rosters for all teams in the specified season.
|
515
555
|
"""
|
516
556
|
|
517
|
-
print(
|
557
|
+
print(f'Scrpaing rosters for the {season} season...')
|
518
558
|
teaminfo = pd.read_csv(info_path)
|
519
559
|
|
520
560
|
rosts = []
|
521
|
-
for team in
|
561
|
+
for team in teaminfo['team_abbr'].drop_duplicates():
|
522
562
|
try:
|
523
|
-
print(
|
524
|
-
api =
|
563
|
+
print(f'Scraping {team} roster...')
|
564
|
+
api = f'https://api-web.nhle.com/v1/roster/{team}/{season}'
|
525
565
|
|
526
566
|
data = rs.get(api).json()
|
527
567
|
forwards = pd.json_normalize(data['forwards'])
|
528
|
-
forwards['
|
568
|
+
forwards['heading_position'] = "F"
|
529
569
|
dmen = pd.json_normalize(data['defensemen'])
|
530
|
-
dmen['
|
570
|
+
dmen['heading_position'] = "D"
|
531
571
|
goalies = pd.json_normalize(data['goalies'])
|
532
|
-
goalies['
|
572
|
+
goalies['heading_position'] = "G"
|
533
573
|
|
534
574
|
roster = pd.concat([forwards,dmen,goalies]).reset_index(drop=True)
|
535
|
-
roster['
|
575
|
+
roster['player_name'] = (roster['firstName.default']+" "+roster['lastName.default']).str.upper()
|
536
576
|
roster['season'] = str(season)
|
537
577
|
roster['team_abbr'] = team
|
538
578
|
|
539
579
|
rosts.append(roster)
|
540
580
|
except:
|
541
|
-
print(
|
581
|
+
print(f'No roster found for {team}...')
|
582
|
+
|
583
|
+
#Combine rosters
|
584
|
+
df = pd.concat(rosts)
|
542
585
|
|
543
|
-
|
586
|
+
#Standardize columns
|
587
|
+
df = df.rename(columns=COL_MAP['roster'])
|
588
|
+
|
589
|
+
#Return: roster data for provided season
|
590
|
+
return df[[col for col in COL_MAP['roster'].values() if col in df.columns]]
|
544
591
|
|
545
592
|
def nhl_scrape_prospects(team:str):
|
546
593
|
"""
|
@@ -558,16 +605,21 @@ def nhl_scrape_prospects(team:str):
|
|
558
605
|
api = f'https://api-web.nhle.com/v1/prospects/{team}'
|
559
606
|
|
560
607
|
data = rs.get(api).json()
|
561
|
-
|
608
|
+
|
609
|
+
print(f'Scraping {team} prospects...')
|
610
|
+
|
562
611
|
#Iterate through positions
|
563
612
|
players = [pd.json_normalize(data[pos]) for pos in ['forwards','defensemen','goalies']]
|
564
613
|
|
565
614
|
prospects = pd.concat(players)
|
566
615
|
#Add name columns
|
567
|
-
prospects['
|
616
|
+
prospects['player_name'] = (prospects['firstName.default']+" "+prospects['lastName.default']).str.upper()
|
568
617
|
|
618
|
+
#Standardize columns
|
619
|
+
prospects = prospects.rename(columns=COL_MAP['prospects'])
|
620
|
+
|
569
621
|
#Return: team prospects
|
570
|
-
return prospects
|
622
|
+
return prospects[[col for col in COL_MAP['prospects'].values() if col in prospects.columns]]
|
571
623
|
|
572
624
|
def nhl_scrape_team_info(country:bool = False):
|
573
625
|
"""
|
@@ -592,9 +644,13 @@ def nhl_scrape_team_info(country:bool = False):
|
|
592
644
|
data['logo_light'] = 'https://assets.nhle.com/logos/nhl/svg/'+data['triCode']+'_light.svg'
|
593
645
|
data['logo_dark'] = 'https://assets.nhle.com/logos/nhl/svg/'+data['triCode']+'_dark.svg'
|
594
646
|
|
595
|
-
|
647
|
+
#Standardize columns
|
648
|
+
data = data.rename(columns=COL_MAP['team_info'])
|
649
|
+
|
650
|
+
#Return: team or country info
|
651
|
+
return data[[col for col in COL_MAP['team_info'].values() if col in data.columns]].sort_values(by=(['country_abbr','country_name'] if country else ['team_abbr','team_name']))
|
596
652
|
|
597
|
-
def
|
653
|
+
def nhl_scrape_player_info(player_ids:list[int]):
|
598
654
|
"""
|
599
655
|
Returns player data for specified players.
|
600
656
|
|
@@ -607,6 +663,7 @@ def nhl_scrape_player_data(player_ids:list[int]):
|
|
607
663
|
A DataFrame containing player data for specified players.
|
608
664
|
"""
|
609
665
|
|
666
|
+
print(f'Retreiving player information for {player_ids}...')
|
610
667
|
infos = []
|
611
668
|
for player_id in player_ids:
|
612
669
|
player_id = int(player_id)
|
@@ -614,7 +671,7 @@ def nhl_scrape_player_data(player_ids:list[int]):
|
|
614
671
|
|
615
672
|
data = pd.json_normalize(rs.get(api).json())
|
616
673
|
#Add name column
|
617
|
-
data['
|
674
|
+
data['player_name'] = (data['firstName.default'] + " " + data['lastName.default']).str.upper()
|
618
675
|
|
619
676
|
#Append
|
620
677
|
infos.append(data)
|
@@ -622,12 +679,15 @@ def nhl_scrape_player_data(player_ids:list[int]):
|
|
622
679
|
if infos:
|
623
680
|
df = pd.concat(infos)
|
624
681
|
|
682
|
+
#Standardize columns
|
683
|
+
df = df.rename(columns=COL_MAP['player_info'])
|
684
|
+
|
625
685
|
#Return: player data
|
626
|
-
return df
|
686
|
+
return df[[col for col in COL_MAP['player_info'].values() if col in df.columns]]
|
627
687
|
else:
|
628
688
|
return pd.DataFrame()
|
629
689
|
|
630
|
-
def nhl_scrape_draft_rankings(arg:str = 'now', category:int = 0):
|
690
|
+
def nhl_scrape_draft_rankings(arg:str | Literal['now'] = 'now', category:int = 0):
|
631
691
|
"""
|
632
692
|
Returns draft rankings
|
633
693
|
Args:
|
@@ -638,8 +698,8 @@ def nhl_scrape_draft_rankings(arg:str = 'now', category:int = 0):
|
|
638
698
|
|
639
699
|
- Category 1 is North American Skaters.
|
640
700
|
- Category 2 is International Skaters.
|
641
|
-
- Category 3 is North American
|
642
|
-
- Category 4 is International
|
701
|
+
- Category 3 is North American Goalies.
|
702
|
+
- Category 4 is International Goalies
|
643
703
|
|
644
704
|
Default is 0 (all prospects).
|
645
705
|
Returns:
|
@@ -647,15 +707,26 @@ def nhl_scrape_draft_rankings(arg:str = 'now', category:int = 0):
|
|
647
707
|
A DataFrame containing draft rankings.
|
648
708
|
"""
|
649
709
|
|
710
|
+
print(f'Scraping draft rankings for {arg}...\nCategory: {DRAFT_CAT[category]}...')
|
711
|
+
|
650
712
|
#Player category only applies when requesting a specific season
|
651
713
|
api = f"https://api-web.nhle.com/v1/draft/rankings/{arg}/{category}" if category > 0 else f"https://api-web.nhle.com/v1/draft/rankings/{arg}"
|
652
714
|
data = pd.json_normalize(rs.get(api).json()['rankings'])
|
653
715
|
|
654
716
|
#Add player name columns
|
655
|
-
data['
|
717
|
+
data['player_name'] = (data['firstName']+" "+data['lastName']).str.upper()
|
718
|
+
|
719
|
+
#Fix positions
|
720
|
+
data['positionCode'] = data['positionCode'].replace({
|
721
|
+
'LW':'L',
|
722
|
+
'RW':'R'
|
723
|
+
})
|
724
|
+
|
725
|
+
#Standardize columns
|
726
|
+
data = data.rename(columns=COL_MAP['draft_rankings'])
|
656
727
|
|
657
728
|
#Return: prospect rankings
|
658
|
-
return data
|
729
|
+
return data[[col for col in COL_MAP['draft_rankings'].values() if col in data.columns]]
|
659
730
|
|
660
731
|
def nhl_scrape_game_info(game_ids:list[int]):
|
661
732
|
"""
|
@@ -670,6 +741,9 @@ def nhl_scrape_game_info(game_ids:list[int]):
|
|
670
741
|
An DataFrame containing information for each game.
|
671
742
|
"""
|
672
743
|
|
744
|
+
#Wrap game_id in a list if only a single game_id is provided
|
745
|
+
game_ids = [game_ids] if type(game_ids) != list else game_ids
|
746
|
+
|
673
747
|
print(f'Finding game information for games: {game_ids}')
|
674
748
|
|
675
749
|
link = 'https://api-web.nhle.com/v1/gamecenter'
|
@@ -678,18 +752,15 @@ def nhl_scrape_game_info(game_ids:list[int]):
|
|
678
752
|
df = pd.concat([pd.json_normalize(rs.get(f'{link}/{game_id}/landing').json()) for game_id in game_ids])
|
679
753
|
|
680
754
|
#Add extra info
|
681
|
-
df['
|
682
|
-
df['
|
683
|
-
df['
|
684
|
-
df['home_team_abbr'] = df['homeTeam.abbrev']
|
685
|
-
df['game_title'] = df['away_team_abbr'] + " @ " + df['home_team_abbr'] + " - " + df['date']
|
686
|
-
df['estStartTime'] = pd.to_datetime(df['startTimeUTC']).dt.tz_convert('US/Eastern').dt.strftime("%I:%M %p")
|
755
|
+
df['game_date'] = df['gameDate']
|
756
|
+
df['game_title'] = df['awayTeam.abbrev'] + " @ " + df['homeTeam.abbrev'] + " - " + df['game_date']
|
757
|
+
df['start_time_est'] = pd.to_datetime(df['startTimeUTC']).dt.tz_convert('US/Eastern').dt.strftime("%I:%M %p")
|
687
758
|
|
688
|
-
|
689
|
-
df = df
|
759
|
+
#Standardize columns
|
760
|
+
df = df.rename(columns=COL_MAP['schedule'])
|
690
761
|
|
691
762
|
#Return: game information
|
692
|
-
return df
|
763
|
+
return df[[col for col in COL_MAP['schedule'].values() if col in df.columns]]
|
693
764
|
|
694
765
|
|
695
766
|
def nhl_apply_xG(pbp: pd.DataFrame):
|
@@ -1144,23 +1215,23 @@ def nhl_calculate_stats(pbp:pd.DataFrame, type:Literal['skater','goalie','team']
|
|
1144
1215
|
|
1145
1216
|
#Import rosters and player info
|
1146
1217
|
rosters = pd.read_csv(roster_path)
|
1147
|
-
names = rosters[['
|
1148
|
-
'headshot','
|
1149
|
-
'
|
1150
|
-
'
|
1218
|
+
names = rosters[['player_id','player_name',
|
1219
|
+
'headshot','position','handedness',
|
1220
|
+
'height_in','weight_lbs',
|
1221
|
+
'birth_date','birth_country']].drop_duplicates(subset=['player_id','player_name'],keep='last')
|
1151
1222
|
|
1152
1223
|
#Add names
|
1153
|
-
complete = pd.merge(complete,names,how='left',left_on='ID',right_on='
|
1224
|
+
complete = pd.merge(complete,names,how='left',left_on='ID',right_on='player_id')
|
1154
1225
|
|
1155
1226
|
#Rename if there are no missing names
|
1156
|
-
complete = complete.rename(columns={'
|
1227
|
+
complete = complete.rename(columns={'player_name':'Goalie',
|
1157
1228
|
'headshot':'Headshot',
|
1158
|
-
'
|
1159
|
-
'
|
1160
|
-
'
|
1161
|
-
'
|
1162
|
-
'
|
1163
|
-
'
|
1229
|
+
'position':'Position',
|
1230
|
+
'handedness':'Handedness',
|
1231
|
+
'height_in':'Height (in)',
|
1232
|
+
'weight_lbs':'Weight (lbs)',
|
1233
|
+
'birth_date':'Birthday',
|
1234
|
+
'birth_country':'Nationality'})
|
1164
1235
|
|
1165
1236
|
#WSBA
|
1166
1237
|
complete['WSBA'] = complete['ID'].astype(str).str.replace('.0','')+complete['Team']+complete['Season'].astype(str)
|
@@ -1272,23 +1343,23 @@ def nhl_calculate_stats(pbp:pd.DataFrame, type:Literal['skater','goalie','team']
|
|
1272
1343
|
|
1273
1344
|
#Import rosters and player info
|
1274
1345
|
rosters = pd.read_csv(roster_path)
|
1275
|
-
names = rosters[['
|
1276
|
-
'headshot','
|
1277
|
-
'
|
1278
|
-
'
|
1346
|
+
names = rosters[['player_id','player_name',
|
1347
|
+
'headshot','position','handedness',
|
1348
|
+
'height_in','weight_lbs',
|
1349
|
+
'birth_date','birth_country']].drop_duplicates(subset=['player_id','player_name'],keep='last')
|
1279
1350
|
|
1280
1351
|
#Add names
|
1281
|
-
complete = pd.merge(complete,names,how='left',left_on='ID',right_on='
|
1352
|
+
complete = pd.merge(complete,names,how='left',left_on='ID',right_on='player_id')
|
1282
1353
|
|
1283
1354
|
#Rename if there are no missing names
|
1284
|
-
complete = complete.rename(columns={'
|
1355
|
+
complete = complete.rename(columns={'player_name':'Player',
|
1285
1356
|
'headshot':'Headshot',
|
1286
|
-
'
|
1287
|
-
'
|
1288
|
-
'
|
1289
|
-
'
|
1290
|
-
'
|
1291
|
-
'
|
1357
|
+
'position':'Position',
|
1358
|
+
'handedness':'Handedness',
|
1359
|
+
'height_in':'Height (in)',
|
1360
|
+
'weight_lbs':'Weight (lbs)',
|
1361
|
+
'birth_date':'Birthday',
|
1362
|
+
'birth_country':'Nationality'})
|
1292
1363
|
|
1293
1364
|
#Set TOI to minute
|
1294
1365
|
complete['TOI'] = complete['TOI']/60
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: wsba_hockey
|
3
|
-
Version: 1.2.
|
3
|
+
Version: 1.2.3
|
4
4
|
Summary: WeakSide Breakout's complete Python package of access to hockey data, primairly including the scraping of National Hockey League schedule, play-by-play, and shifts information.
|
5
5
|
Author-email: Owen Singh <owenbksingh@gmail.com>
|
6
6
|
Project-URL: Homepage, https://github.com/owensingh38/wsba_hockey/
|
@@ -9,6 +9,8 @@ src/wsba_hockey.egg-info/dependency_links.txt
|
|
9
9
|
src/wsba_hockey.egg-info/top_level.txt
|
10
10
|
src/wsba_hockey/tools/__init__.py
|
11
11
|
src/wsba_hockey/tools/agg.py
|
12
|
+
src/wsba_hockey/tools/columns.py
|
13
|
+
src/wsba_hockey/tools/game_pred.py
|
12
14
|
src/wsba_hockey/tools/plotting.py
|
13
15
|
src/wsba_hockey/tools/scraping.py
|
14
16
|
src/wsba_hockey/tools/xg_model.py
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|