wsba-hockey 1.2.2__py3-none-any.whl → 1.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
wsba_hockey/__init__.py CHANGED
@@ -7,6 +7,7 @@ from wsba_hockey.wsba_main import (
7
7
  nhl_scrape_roster,
8
8
  nhl_scrape_draft_rankings,
9
9
  nhl_scrape_prospects,
10
+ nhl_scrape_player_info,
10
11
  nhl_scrape_team_info,
11
12
  nhl_scrape_game_info,
12
13
  nhl_calculate_stats,
@@ -0,0 +1,311 @@
1
+ ### COLUMN NAMES ###
2
+ # Provided in this file are dicts to standardize column names as necessary #
3
+
4
+ def col_map():
5
+ return {
6
+ 'schedule':{
7
+ 'season':'season',
8
+ 'id':'game_id',
9
+ 'game_title':'game_title',
10
+ 'game_date':'game_date',
11
+ 'gameType':'season_type',
12
+ 'neutralSite':'neutral_site',
13
+ 'startTimeUTC':'start_time_utc',
14
+ 'start_time_est':'start_time_est',
15
+ 'easternUTCOffset':'eastern_utc',
16
+ 'venueUTCOffset':'venue_utc',
17
+ 'venueTimezone':'venue_timezone',
18
+ 'gameState':'game_state',
19
+ 'gameScheduleState':'game_schedule_state',
20
+ 'tvBroadcasts':'tv_broadcasts',
21
+ 'threeMinRecap':'three_min_recap',
22
+ 'threeMinRecapFr':'three_min_recap_fr',
23
+ 'condensedGame':'condensed_game',
24
+ 'condensedGameFr':'condensed_game_fr',
25
+ 'gameCenterLink':'gamecenter_link',
26
+ 'venue.default':'venue',
27
+ 'awayTeam.id':'away_team_id',
28
+ 'homeTeam.id':'home_team_id',
29
+ 'awayTeam.abbrev':'away_team_abbr',
30
+ 'homeTeam.abbrev':'home_team_abbr',
31
+ 'awayTeam.darkLogo':'away_team_logo',
32
+ 'homeTeam.darkLogo':'home_team_logo',
33
+ 'awayTeam.awaySplitSquad':'away_team_split_squad',
34
+ 'homeTeam.homeSplitSquad':'home_team_split_squad',
35
+ 'awayTeam.score':'away_score',
36
+ 'homeTeam.score':'home_score',
37
+ 'periodDescriptor.number':'period',
38
+ 'periodDescriptor.maxRegulationPeriods':'period_max_regulation',
39
+ 'periodDescriptor.periodType':'period_type',
40
+ 'gameOutcome.lastPeriodType':'period_type_last',
41
+ 'seriesUrl':'series_url',
42
+ 'seriesStatus.round':'series_round',
43
+ 'seriesStatus.seriesAbbrev':'series_abbr',
44
+ 'seriesStatus.seriesTitle':'series_name',
45
+ 'seriesStatus.seriesLetter':'series_letter',
46
+ 'seriesStatus.neededToWin':'series_games_needed_to_win',
47
+ 'seriesStatus.gameNumberOfSeries':'series_game_num',
48
+ 'seriesStatus.topSeedTeamAbbrev':'top_seed_team_abbr',
49
+ 'seriesStatus.bottomSeedTeamAbbrev':'bottom_seed_team_abbr',
50
+ 'seriesStatus.topSeedWins':'top_seed_wins',
51
+ 'seriesStatus.bottomSeedWins':'bottom_seed_wins'
52
+ },
53
+ 'season_info':{
54
+ 'id':'season',
55
+ 'formattedSeasonId':'season_name',
56
+ 'seasonOrdinal':'season_ordinal',
57
+ 'startDate':'start_date',
58
+ 'endDate':'end_date',
59
+ 'numberOfGames':'games_num',
60
+ 'totalRegularSeasonGames':'regular_season_total_games_num',
61
+ 'totalPlayoffGames':'playoffs_total_games_num',
62
+ 'regularSeasonStartDate':'regular_season_start_date',
63
+ 'regularSeasonEndDate':'regular_season_end_date',
64
+ 'standingsStart':'standings_start_date',
65
+ 'standingsEnd':'standings_end_date',
66
+ 'allStarGameInUse':'all_star_game',
67
+ 'conferencesInUse':'conferences',
68
+ 'divisionsInUse':'divisions',
69
+ 'wildcardInUse':'wildcard',
70
+ 'entryDraftInUse':'entry_draft',
71
+ 'nhlStanleyCupOwner':'nhl_stanley_cup_owner',
72
+ 'olympicsParticipation':'olympic_participation',
73
+ 'tiesInUse':'ties',
74
+ 'pointForOTLossInUse':'ot_loss_point',
75
+ 'regulationWinsInUse':'reg_wins',
76
+ 'rowInUse':'row',
77
+ 'supplementalDraftInUse':'supplemental_draft'
78
+ },
79
+ 'standings':{
80
+ "clinchIndicator": "clinch_indicator",
81
+ "conferenceAbbrev": "conference_abbr",
82
+ "conferenceHomeSequence": "conference_home_sequence",
83
+ "conferenceL10Sequence": "conference_l10_sequence",
84
+ "conferenceName": "conference_name",
85
+ "conferenceRoadSequence": "conference_road_sequence",
86
+ "conferenceSequence": "conference_sequence",
87
+ "date": "date",
88
+ "divisionAbbrev": "division_abbr",
89
+ "divisionHomeSequence": "division_home_sequence",
90
+ "divisionL10Sequence": "division_l10_sequence",
91
+ "divisionName": "division_name",
92
+ "divisionRoadSequence": "division_road_sequence",
93
+ "divisionSequence": "division_sequence",
94
+ "gameTypeId": "game_type_id",
95
+ "gamesPlayed": "GP",
96
+ "goalDifferential": "GD",
97
+ "goalDifferentialPctg": "GD%",
98
+ "goalAgainst": "GA",
99
+ "goalFor": "GF",
100
+ "goalsForPctg": "GF%",
101
+ "homeGamesPlayed": "HGP",
102
+ "homeGoalDifferential": "HGD",
103
+ "homeGoalsAgainst": "HGA",
104
+ "homeGoalsFor": "HGF",
105
+ "homeLosses": "HL",
106
+ "homeOtLosses": "HOL",
107
+ "homePoints": "HPTS",
108
+ "homeRegulationPlusOtWins": "HROW",
109
+ "homeRegulationWins": "HRW",
110
+ "homeTies": "HT",
111
+ "homeWins": "HW",
112
+ "l10GamesPlayed": "l10_GP",
113
+ "l10GoalDifferential": "l10_GD",
114
+ "l10GoalsAgainst": "l10_GA",
115
+ "l10GoalsFor": "l10_GF",
116
+ "l10Losses": "l10_L",
117
+ "l10OtLosses": "l10_OTL",
118
+ "l10Points": "l10_PTS",
119
+ "l10RegulationPlusOtWins": "l10_ROW",
120
+ "l10RegulationWins": "l10_RW",
121
+ "l10Ties": "l10_T",
122
+ "l10Wins": "l10_W",
123
+ "leagueHomeSequence": "league_home_sequence",
124
+ "leagueL10Sequence": "league_l10_sequence",
125
+ "leagueRoadSequence": "league_road_sequence",
126
+ "leagueSequence": "league_sequence",
127
+ "losses": "L",
128
+ "otLosses": "OTL",
129
+ "pointPctg": "PTS%",
130
+ "points": "PTS",
131
+ "regulationPlusOtWinPctg": "ROW%",
132
+ "regulationPlusOtWins": "ROW",
133
+ "regulationWinPctg": "RW%",
134
+ "regulationWins": "RW",
135
+ "roadGamesPlayed": "AGP",
136
+ "roadGoalDifferential": "AGD",
137
+ "roadGoalsAgainst": "AGA",
138
+ "roadGoalsFor": "AGF",
139
+ "roadLosses": "AL",
140
+ "roadOtLosses": "AOTL",
141
+ "roadPoints": "APTS",
142
+ "roadRegulationPlusOtWins": "AROW",
143
+ "roadRegulationWins": "ARW",
144
+ "roadTies": "AT",
145
+ "roadWins": "AW",
146
+ "seasonId": "season",
147
+ "shootoutLosses": "SOL",
148
+ "shootoutWins": "SOW",
149
+ "streakCode": "streak_code",
150
+ "streakCount": "streak_count",
151
+ "teamLogo": "team_logo",
152
+ "ties": "T",
153
+ "waiversSequence": "waivers_sequence",
154
+ "wildcardSequence": "wildcard_sequence",
155
+ "winPctg": "W%",
156
+ "wins": "W",
157
+ "placeName.default": "place_name",
158
+ "teamName.default": "team_name",
159
+ "teamCommonName.default": "team_common_name",
160
+ "teamAbbrev.default": "team_abbr",
161
+ "seriesUrl": "series_url",
162
+ "seriesTitle": "series_title",
163
+ "seriesAbbrev": "series_abbrev",
164
+ "seriesLetter": "series_letter",
165
+ "playoffRound": "playoff_round",
166
+ "topSeedRank": "top_seed_rank",
167
+ "topSeedRankAbbrev": "top_seed_rank_abbr",
168
+ "topSeedWins": "top_seed_wins",
169
+ "bottomSeedRank": "bottom_seed_rank",
170
+ "bottomSeedRankAbbrev": "bottom_seed_rank_abbr",
171
+ "bottomSeedWins": "bottom_seed_wins",
172
+ "winningTeamId": "winning_team_id",
173
+ "losingTeamId": "losing_team_id",
174
+ "topSeedTeam.id": "top_seed_team_id",
175
+ "topSeedTeam.abbrev": "top_seed_team_abbr",
176
+ "topSeedTeam.name.default": "top_seed_team_name",
177
+ "topSeedTeam.commonName.default": "top_seed_team_common_name",
178
+ "topSeedTeam.placeNameWithPreposition.default": "top_seed_team_place_name",
179
+ "topSeedTeam.logo": "top_seed_team_logo",
180
+ "topSeedTeam.darkLogo": "top_seed_team_dark_logo",
181
+ "bottomSeedTeam.id": "bottom_seed_team_id",
182
+ "bottomSeedTeam.abbrev": "bottom_seed_team_abbr",
183
+ "bottomSeedTeam.name.default": "bottom_seed_team_name",
184
+ "bottomSeedTeam.commonName.default": "bottom_seed_team_common_name",
185
+ "bottomSeedTeam.placeNameWithPreposition.default": "bottom_seed_team_place_name",
186
+ "bottomSeedTeam.logo": "bottom_seed_team_logo",
187
+ "bottomSeedTeam.darkLogo": "bottom_seed_team_dark_logo",
188
+ "seriesLogo": "series_logo",
189
+ },
190
+ 'roster':{
191
+ "id": "player_id",
192
+ "team_abbr": "team_abbr",
193
+ "season": "season",
194
+ "headshot": "headshot",
195
+ "sweaterNumber": "sweater_number",
196
+ "positionCode": "position",
197
+ "heading_position": "heading_position",
198
+ "shootsCatches": "handedness",
199
+ "heightInInches": "height_in",
200
+ "weightInPounds": "weight_lbs",
201
+ "heightInCentimeters": "height_cm",
202
+ "weightInKilograms": "weight_kg",
203
+ "birthDate": "birth_date",
204
+ "birthCountry": "birth_country",
205
+ "player_name":"player_name",
206
+ "firstName.default": "player_first_name",
207
+ "lastName.default": "player_last_name",
208
+ "birthCity.default": "birth_city",
209
+ "birthStateProvince.default": "birth_state_province"
210
+ },
211
+ 'prospects':{
212
+ "id": "player_id",
213
+ "headshot": "headshot",
214
+ "sweaterNumber": "sweater_number",
215
+ "positionCode": "position",
216
+ "shootsCatches": "handedness",
217
+ "heightInInches": "height_in",
218
+ "weightInPounds": "weight_lbs",
219
+ "heightInCentimeters": "height_cm",
220
+ "weightInKilograms": "weight_kg",
221
+ "birthDate": "birth_date",
222
+ "birthCountry": "birth_country",
223
+ "player_name":"player_name",
224
+ "firstName.default": "player_first_name",
225
+ "lastName.default": "player_last_name",
226
+ "birthCity.default": "birth_city",
227
+ "birthStateProvince.default": "birth_state_province",
228
+ },
229
+ 'player_info':{
230
+ 'playerId': 'player_id',
231
+ 'player_name': 'player_name',
232
+ 'isActive': 'is_active',
233
+ 'currentTeamId': 'current_team_id',
234
+ 'currentTeamAbbrev': 'current_team_abbr',
235
+ 'badges': 'badges',
236
+ 'teamLogo': 'team_logo',
237
+ 'sweaterNumber': 'sweater_number',
238
+ 'position': 'position',
239
+ 'headshot': 'headshot',
240
+ 'heroImage': 'hero_image',
241
+ 'heightInInches': 'height_in',
242
+ 'heightInCentimeters': 'height_cm',
243
+ 'weightInPounds': 'weight_lbs',
244
+ 'weightInKilograms': 'weight_kg',
245
+ 'birthDate': 'birth_date',
246
+ 'birthCountry': 'birth_country',
247
+ 'shootsCatches': 'handedness',
248
+ 'playerSlug': 'player_slug',
249
+ 'inTop100AllTime': 'in_top_100_all_time',
250
+ 'inHHOF': 'in_hhof',
251
+ 'shopLink': 'shop_link',
252
+ 'twitterLink': 'twitter_link',
253
+ 'watchLink': 'watch_link',
254
+ 'last5Games': 'last_5_games',
255
+ 'seasonTotals': 'season_totals',
256
+ 'awards': 'awards',
257
+ 'currentTeamRoster': 'current_team_roster',
258
+ 'fullTeamName.default': 'full_team_name',
259
+ 'teamCommonName.default': 'team_common_name',
260
+ 'teamPlaceNameWithPreposition.default': 'team_place_name_with_preposition',
261
+ 'firstName.default': 'player_first_name',
262
+ 'lastName.default': 'player_last_name',
263
+ 'birthCity.default': 'birth_city',
264
+ 'birthStateProvince.default': 'birth_state_province',
265
+ 'draftDetails.year': 'draft_year',
266
+ 'draftDetails.teamAbbrev': 'draft_team_abbr',
267
+ 'draftDetails.round': 'draft_round',
268
+ 'draftDetails.pickInRound': 'draft_pick_in_round',
269
+ 'draftDetails.overallPick': 'draft_overall_pick',
270
+ },
271
+ 'team_info':{
272
+ "id":"team_id",
273
+ "franchiseId":"franchise_id",
274
+ "fullName":"team_name",
275
+ "leagueId":"league_id",
276
+ "triCode":"team_abbr",
277
+ "logo_light":"logo_light",
278
+ "logo_dark":"logo_dark",
279
+ "country3Code":"country_abbr",
280
+ "countryCode":"country_abbr_2",
281
+ "countryName":"country_name",
282
+ "hasPlayerStats":"has_player_stats",
283
+ "imageUrl":"country_flag_large",
284
+ "isActive":"is_active",
285
+ "nationalityName":"nationality",
286
+ "olympicUrl":"olympic_url",
287
+ "thumbnailUrl":"thumbnail_url"
288
+ },
289
+ 'draft_rankings':{
290
+ "id": "player_id",
291
+ "player_name":"player_name",
292
+ "firstName": "player_first_name",
293
+ "lastName": "player_last_name",
294
+ "headshot": "headshot",
295
+ "sweaterNumber": "sweater_number",
296
+ "positionCode": "position",
297
+ "shootsCatches": "handedness",
298
+ "heightInInches": "height_in",
299
+ "weightInPounds": "weight_lbs",
300
+ "heightInCentimeters": "height_cm",
301
+ "weightInKilograms": "weight_kg",
302
+ "birthDate": "birth_date",
303
+ "birthCity": "birth_city",
304
+ "birthCountry": "birth_country",
305
+ "birthStateProvince": "birth_state_province",
306
+ "lastAmateurClub": "last_amateur_club",
307
+ "lastAmateurLeague": "last_amateur_league",
308
+ "midtermRank":"midterm_rank",
309
+ "finalRank":"final_rank"
310
+ }
311
+ }
@@ -50,7 +50,7 @@ def prep_plot_data(pbp,events,strengths,marker_dict=event_markers):
50
50
  pbp = wsba_xG(pbp)
51
51
  pbp['xG'] = np.where(pbp['xG'].isna(),0,pbp['xG'])
52
52
 
53
- pbp['WSBA'] = pbp['event_player_1_name']+pbp['season'].astype(str)+pbp['event_team_abbr']
53
+ pbp['WSBA'] = pbp['event_player_1_id'].astype(str)+pbp['season'].astype(str)+pbp['event_team_abbr']
54
54
 
55
55
  pbp['x_plot'] = np.where(pbp['x']<0,-pbp['y_adj'],pbp['y_adj'])
56
56
  pbp['y_plot'] = abs(pbp['x_adj'])
@@ -88,8 +88,8 @@ def plot_skater_shots(pbp, player, season, team, strengths, title = None, marker
88
88
  pbp = pbp.loc[(pbp['season'].astype(str)==season)&((pbp['away_team_abbr']==team)|(pbp['home_team_abbr']==team))]
89
89
 
90
90
  team_data = pd.read_csv(info_path)
91
- team_color = list(team_data.loc[team_data['WSBA']==f'{team}{season}','Primary Color'])[0]
92
- team_color_2nd = list(team_data.loc[team_data['WSBA']==f'{team}{season}','Secondary Color'])[0]
91
+ team_color = list(team_data.loc[team_data['WSBA']==f'{team}{season}','primary_color'])[0]
92
+ team_color_2nd = list(team_data.loc[team_data['WSBA']==f'{team}{season}','secondary_color'])[0]
93
93
 
94
94
  if onice in ['for','against']:
95
95
  skater = pbp.loc[(pbp[f'onice_{onice}'].str.contains(player.upper()))]
@@ -123,8 +123,8 @@ def plot_game_events(pbp,game_id,events,strengths,marker_dict=event_markers,team
123
123
 
124
124
  team_data = pd.read_csv(info_path)
125
125
  team_info ={
126
- 'away_color':'#000000' if list(team_data.loc[team_data['WSBA']==f'{away_abbr}{season}','Secondary Color'])[0]=='#FFFFFF' else list(team_data.loc[team_data['WSBA']==f'{away_abbr}{season}',f'{team_colors['away'].capitalize()} Color'])[0],
127
- 'home_color': list(team_data.loc[team_data['WSBA']==f'{home_abbr}{season}',f'{team_colors['home'].capitalize()} Color'])[0],
126
+ 'away_color':'#000000' if list(team_data.loc[team_data['WSBA']==f'{away_abbr}{season}','secondary_color'])[0]=='#FFFFFF' else list(team_data.loc[team_data['WSBA']==f'{away_abbr}{season}',f'{team_colors['away']}_color'])[0],
127
+ 'home_color': list(team_data.loc[team_data['WSBA']==f'{home_abbr}{season}',f'{team_colors['home']}_color'])[0],
128
128
  'away_logo': f'tools/logos/png/{away_abbr}{season}.png',
129
129
  'home_logo': f'tools/logos/png/{home_abbr}{season}.png',
130
130
  }
@@ -90,18 +90,18 @@ def fix_players(pbp):
90
90
  print('Adding player info to pbp...')
91
91
 
92
92
  #Load roster and all players
93
- roster = pd.read_csv(roster_path).drop_duplicates(['id'])[['fullName','id','shootsCatches']]
93
+ roster = pd.read_csv(roster_path).drop_duplicates(['player_id'])[['player_name','player_id','handedness']]
94
94
 
95
95
  #Some players are missing from the roster file (generally in newer seasons); add these manually
96
- miss = list(pbp.loc[~(pbp['event_player_1_id'].isin(list(roster['id'])))&(pbp['event_player_1_id'].notna()),'event_player_1_id'].drop_duplicates())
96
+ miss = list(pbp.loc[~(pbp['event_player_1_id'].isin(list(roster['player_id'])))&(pbp['event_player_1_id'].notna()),'event_player_1_id'].drop_duplicates())
97
97
  if miss:
98
- add = wsba.nhl_scrape_player_data(miss).rename(columns={'playerId':'id'})[['fullName','id','shootsCatches']]
98
+ add = wsba.nhl_scrape_player_info(miss)[['player_name','player_id','handedness']]
99
99
  roster = pd.concat([roster,add]).reset_index(drop=True)
100
100
 
101
101
  #Conversion dict
102
- roster['id'] = roster['id'].astype(str)
103
- roster_dict = roster.set_index('id').to_dict()['shootsCatches']
104
- names_dict = roster.set_index('id').to_dict()['fullName']
102
+ roster['player_id'] = roster['player_id'].astype(str)
103
+ roster_dict = roster.set_index('player_id').to_dict()['handedness']
104
+ names_dict = roster.set_index('player_id').to_dict()['player_name']
105
105
 
106
106
  #Add player names
107
107
  for i in range(3):
wsba_hockey/wsba_main.py CHANGED
@@ -11,6 +11,7 @@ from wsba_hockey.tools.scraping import *
11
11
  from wsba_hockey.tools.xg_model import *
12
12
  from wsba_hockey.tools.agg import *
13
13
  from wsba_hockey.tools.plotting import *
14
+ from wsba_hockey.tools.columns import col_map
14
15
 
15
16
  ### WSBA HOCKEY ###
16
17
  ## Provided below are all integral functions in the WSBA Hockey Python package. ##
@@ -114,8 +115,19 @@ SCHEDULE_PATH = os.path.join(DIR,'tools\\schedule\\schedule.csv')
114
115
  INFO_PATH = os.path.join(DIR,'tools\\teaminfo\\nhl_teaminfo.csv')
115
116
  DEFAULT_ROSTER = os.path.join(DIR,'tools\\rosters\\nhl_rosters.csv')
116
117
 
118
+ #Load column names for standardization
119
+ COL_MAP = col_map()
120
+
121
+ DRAFT_CAT = {
122
+ 0: 'All Prospects',
123
+ 1: 'North American Skaters',
124
+ 2: 'International Skater',
125
+ 3: 'North American Goalies',
126
+ 4: 'International Goalies'
127
+ }
128
+
117
129
  ## SCRAPE FUNCTIONS ##
118
- def nhl_scrape_game(game_ids:list[int], split_shifts:bool = False, remove:list[str] = [], verbose:bool = False, sources:bool = False, errors:bool = False):
130
+ def nhl_scrape_game(game_ids:int | list[int], split_shifts:bool = False, remove:list[str] = [], verbose:bool = False, sources:bool = False, errors:bool = False):
119
131
  """
120
132
  Given a set of game_ids (NHL API), return complete play-by-play information as requested.
121
133
 
@@ -319,24 +331,17 @@ def nhl_scrape_schedule(season:int, start:str = '', end:str = ''):
319
331
  if gameWeek.empty:
320
332
  game.append(gameWeek)
321
333
  else:
322
- gameWeek['date'] = get['gameWeek'][0]['date']
323
-
324
- gameWeek['season_type'] = gameWeek['gameType']
325
- gameWeek['away_team_abbr'] = gameWeek['awayTeam.abbrev']
326
- gameWeek['home_team_abbr'] = gameWeek['homeTeam.abbrev']
327
- gameWeek['game_title'] = gameWeek['away_team_abbr'] + " @ " + gameWeek['home_team_abbr'] + " - " + gameWeek['date']
328
- gameWeek['estStartTime'] = pd.to_datetime(gameWeek['startTimeUTC']).dt.tz_convert('US/Eastern').dt.strftime("%I:%M %p")
329
-
330
- front_col = ['id','season','date','season_type','game_title','away_team_abbr','home_team_abbr','estStartTime']
331
- gameWeek = gameWeek[front_col+[col for col in gameWeek.columns.to_list() if col not in front_col]]
334
+ gameWeek['game_date'] = get['gameWeek'][0]['date']
335
+ gameWeek['game_title'] = gameWeek['awayTeam.abbrev'] + " @ " + gameWeek['homeTeam.abbrev'] + " - " + gameWeek['game_date']
336
+ gameWeek['start_time_est'] = pd.to_datetime(gameWeek['startTimeUTC']).dt.tz_convert('US/Eastern').dt.strftime("%I:%M %p")
332
337
 
333
338
  game.append(gameWeek)
334
339
 
335
- #Concatenate all games
336
- df = pd.concat(game)
340
+ #Concatenate all games and standardize column naming
341
+ df = pd.concat(game).rename(columns=COL_MAP['schedule'],errors='ignore')
337
342
 
338
343
  #Return: specificed schedule data
339
- return df
344
+ return df[[col for col in COL_MAP['schedule'].values() if col in df.columns]]
340
345
 
341
346
  def nhl_scrape_season(season:int, split_shifts:bool = False, season_types:list[int] = [2,3], remove:list[str] = [], start:str = '', end:str = '', local:bool=False, local_path:str = SCHEDULE_PATH, verbose:bool = False, sources:bool = False, errors:bool = False):
342
347
  """
@@ -399,13 +404,13 @@ def nhl_scrape_season(season:int, split_shifts:bool = False, season_types:list[i
399
404
 
400
405
  load = load.loc[(load['season']==season)&
401
406
  (load['season_type'].isin(season_types))&
402
- (load['date']>=start)&(load['date']<=end)]
407
+ (load['game_date']>=start)&(load['game_date']<=end)]
403
408
 
404
- game_ids = load['id'].to_list()
409
+ game_ids = load['game_id'].to_list()
405
410
  else:
406
411
  load = nhl_scrape_schedule(season,start,end)
407
412
  load = load.loc[(load['season']==season)&(load['season_type'].isin(season_types))]
408
- game_ids = load['id'].to_list()
413
+ game_ids = load['game_id'].to_list()
409
414
 
410
415
  #If no games found, terminate the process
411
416
  if not game_ids:
@@ -440,10 +445,9 @@ def nhl_scrape_seasons_info(seasons:list[int] = []):
440
445
  A DataFrame containing the information for requested seasons.
441
446
  """
442
447
 
443
- #
444
- # param 'season' - list of seasons to include
445
-
446
448
  print(f'Scraping info for seasons: {seasons}')
449
+
450
+ #Load two different data sources: general season info and standings data related to season
447
451
  api = "https://api.nhle.com/stats/rest/en/season"
448
452
  info = "https://api-web.nhle.com/v1/standings-season"
449
453
  data = rs.get(api).json()['data']
@@ -452,12 +456,17 @@ def nhl_scrape_seasons_info(seasons:list[int] = []):
452
456
  df = pd.json_normalize(data)
453
457
  df_2 = pd.json_normalize(data_2)
454
458
 
455
- df = pd.merge(df,df_2,how='outer',on=['id'])
459
+ #Remove common columns
460
+ df_2 = df_2.drop(columns=['conferencesInUse', 'divisionsInUse', 'pointForOTlossInUse','rowInUse','tiesInUse','wildcardInUse'])
456
461
 
462
+ df = pd.merge(df,df_2,how='outer',on=['id']).rename(columns=COL_MAP['season_info'])
463
+
464
+ df = df[[col for col in COL_MAP['season_info'].values() if col in df.columns]]
465
+
457
466
  if len(seasons) > 0:
458
- return df.loc[df['id'].astype(str).isin(seasons)].sort_values(by=['id'])
467
+ return df.loc[df['season'].isin(seasons)].sort_values(by=['season'])
459
468
  else:
460
- return df.sort_values(by=['id'])
469
+ return df.sort_values(by=['season'])
461
470
 
462
471
  def nhl_scrape_standings(arg:int | list[int] | Literal['now'] = 'now', season_type:int = 2):
463
472
  """
@@ -495,8 +504,11 @@ def nhl_scrape_standings(arg:int | list[int] | Literal['now'] = 'now', season_ty
495
504
  data = rs.get(api).json()['series']
496
505
  dfs.append(pd.json_normalize(data))
497
506
 
507
+ #Combine and standardize columns
508
+ df = pd.concat(dfs).rename(columns=COL_MAP['standings'])
509
+
498
510
  #Return: playoff bracket
499
- return pd.concat(dfs)
511
+ return df[[col for col in COL_MAP['standings'].values() if col in df.columns]]
500
512
 
501
513
  else:
502
514
  if arg == "now":
@@ -526,8 +538,11 @@ def nhl_scrape_standings(arg:int | list[int] | Literal['now'] = 'now', season_ty
526
538
  data = rs.get(api).json()['standings']
527
539
  dfs.append(pd.json_normalize(data))
528
540
 
541
+ #Standardize columns
542
+ df = pd.concat(dfs).rename(columns=COL_MAP['standings'])
543
+
529
544
  #Return: standings data
530
- return pd.concat(dfs)
545
+ return df[[col for col in COL_MAP['standings'].values() if col in df.columns]]
531
546
 
532
547
  def nhl_scrape_roster(season: int):
533
548
  """
@@ -542,33 +557,40 @@ def nhl_scrape_roster(season: int):
542
557
  A DataFrame containing the rosters for all teams in the specified season.
543
558
  """
544
559
 
545
- print("Scrpaing rosters for the "+ season + "season...")
560
+ print(f'Scrpaing rosters for the {season} season...')
546
561
  teaminfo = pd.read_csv(info_path)
547
562
 
548
563
  rosts = []
549
- for team in list(teaminfo['Team']):
564
+ for team in teaminfo['team_abbr'].drop_duplicates():
550
565
  try:
551
- print("Scraping " + team + " roster...")
552
- api = "https://api-web.nhle.com/v1/roster/"+team+"/"+season
566
+ print(f'Scraping {team} roster...')
567
+ api = f'https://api-web.nhle.com/v1/roster/{team}/{season}'
553
568
 
554
569
  data = rs.get(api).json()
555
570
  forwards = pd.json_normalize(data['forwards'])
556
- forwards['headingPosition'] = "F"
571
+ forwards['heading_position'] = "F"
557
572
  dmen = pd.json_normalize(data['defensemen'])
558
- dmen['headingPosition'] = "D"
573
+ dmen['heading_position'] = "D"
559
574
  goalies = pd.json_normalize(data['goalies'])
560
- goalies['headingPosition'] = "G"
575
+ goalies['heading_position'] = "G"
561
576
 
562
577
  roster = pd.concat([forwards,dmen,goalies]).reset_index(drop=True)
563
- roster['fullName'] = (roster['firstName.default']+" "+roster['lastName.default']).str.upper()
578
+ roster['player_name'] = (roster['firstName.default']+" "+roster['lastName.default']).str.upper()
564
579
  roster['season'] = str(season)
565
580
  roster['team_abbr'] = team
566
581
 
567
582
  rosts.append(roster)
568
583
  except:
569
- print("No roster found for " + team + "...")
584
+ print(f'No roster found for {team}...')
570
585
 
571
- return pd.concat(rosts)
586
+ #Combine rosters
587
+ df = pd.concat(rosts)
588
+
589
+ #Standardize columns
590
+ df = df.rename(columns=COL_MAP['roster'])
591
+
592
+ #Return: roster data for provided season
593
+ return df[[col for col in COL_MAP['roster'].values() if col in df.columns]]
572
594
 
573
595
  def nhl_scrape_prospects(team:str):
574
596
  """
@@ -586,16 +608,21 @@ def nhl_scrape_prospects(team:str):
586
608
  api = f'https://api-web.nhle.com/v1/prospects/{team}'
587
609
 
588
610
  data = rs.get(api).json()
589
-
611
+
612
+ print(f'Scraping {team} prospects...')
613
+
590
614
  #Iterate through positions
591
615
  players = [pd.json_normalize(data[pos]) for pos in ['forwards','defensemen','goalies']]
592
616
 
593
617
  prospects = pd.concat(players)
594
618
  #Add name columns
595
- prospects['fullName'] = (prospects['firstName.default']+" "+prospects['lastName.default']).str.upper()
619
+ prospects['player_name'] = (prospects['firstName.default']+" "+prospects['lastName.default']).str.upper()
596
620
 
621
+ #Standardize columns
622
+ prospects = prospects.rename(columns=COL_MAP['prospects'])
623
+
597
624
  #Return: team prospects
598
- return prospects
625
+ return prospects[[col for col in COL_MAP['prospects'].values() if col in prospects.columns]]
599
626
 
600
627
  def nhl_scrape_team_info(country:bool = False):
601
628
  """
@@ -620,9 +647,13 @@ def nhl_scrape_team_info(country:bool = False):
620
647
  data['logo_light'] = 'https://assets.nhle.com/logos/nhl/svg/'+data['triCode']+'_light.svg'
621
648
  data['logo_dark'] = 'https://assets.nhle.com/logos/nhl/svg/'+data['triCode']+'_dark.svg'
622
649
 
623
- return data.sort_values(by=(['country3Code','countryCode','iocCode','countryName'] if country else ['fullName','triCode','id']))
650
+ #Standardize columns
651
+ data = data.rename(columns=COL_MAP['team_info'])
624
652
 
625
- def nhl_scrape_player_data(player_ids:list[int]):
653
+ #Return: team or country info
654
+ return data[[col for col in COL_MAP['team_info'].values() if col in data.columns]].sort_values(by=(['country_abbr','country_name'] if country else ['team_abbr','team_name']))
655
+
656
+ def nhl_scrape_player_info(player_ids:list[int]):
626
657
  """
627
658
  Returns player data for specified players.
628
659
 
@@ -635,6 +666,11 @@ def nhl_scrape_player_data(player_ids:list[int]):
635
666
  A DataFrame containing player data for specified players.
636
667
  """
637
668
 
669
+ print(f'Retreiving player information for {player_ids}...')
670
+
671
+ #Wrap game_id in a list if only a single game_id is provided
672
+ player_ids = [player_ids] if type(player_ids) != list else player_ids
673
+
638
674
  infos = []
639
675
  for player_id in player_ids:
640
676
  player_id = int(player_id)
@@ -642,7 +678,7 @@ def nhl_scrape_player_data(player_ids:list[int]):
642
678
 
643
679
  data = pd.json_normalize(rs.get(api).json())
644
680
  #Add name column
645
- data['fullName'] = (data['firstName.default'] + " " + data['lastName.default']).str.upper()
681
+ data['player_name'] = (data['firstName.default'] + " " + data['lastName.default']).str.upper()
646
682
 
647
683
  #Append
648
684
  infos.append(data)
@@ -650,8 +686,11 @@ def nhl_scrape_player_data(player_ids:list[int]):
650
686
  if infos:
651
687
  df = pd.concat(infos)
652
688
 
689
+ #Standardize columns
690
+ df = df.rename(columns=COL_MAP['player_info'])
691
+
653
692
  #Return: player data
654
- return df
693
+ return df[[col for col in COL_MAP['player_info'].values() if col in df.columns]]
655
694
  else:
656
695
  return pd.DataFrame()
657
696
 
@@ -666,8 +705,8 @@ def nhl_scrape_draft_rankings(arg:str | Literal['now'] = 'now', category:int = 0
666
705
 
667
706
  - Category 1 is North American Skaters.
668
707
  - Category 2 is International Skaters.
669
- - Category 3 is North American Goalie.
670
- - Category 4 is International Goalie
708
+ - Category 3 is North American Goalies.
709
+ - Category 4 is International Goalies
671
710
 
672
711
  Default is 0 (all prospects).
673
712
  Returns:
@@ -675,15 +714,26 @@ def nhl_scrape_draft_rankings(arg:str | Literal['now'] = 'now', category:int = 0
675
714
  A DataFrame containing draft rankings.
676
715
  """
677
716
 
717
+ print(f'Scraping draft rankings for {arg}...\nCategory: {DRAFT_CAT[category]}...')
718
+
678
719
  #Player category only applies when requesting a specific season
679
720
  api = f"https://api-web.nhle.com/v1/draft/rankings/{arg}/{category}" if category > 0 else f"https://api-web.nhle.com/v1/draft/rankings/{arg}"
680
721
  data = pd.json_normalize(rs.get(api).json()['rankings'])
681
722
 
682
723
  #Add player name columns
683
- data['fullName'] = (data['firstName']+" "+data['lastName']).str.upper()
724
+ data['player_name'] = (data['firstName']+" "+data['lastName']).str.upper()
725
+
726
+ #Fix positions
727
+ data['positionCode'] = data['positionCode'].replace({
728
+ 'LW':'L',
729
+ 'RW':'R'
730
+ })
731
+
732
+ #Standardize columns
733
+ data = data.rename(columns=COL_MAP['draft_rankings'])
684
734
 
685
735
  #Return: prospect rankings
686
- return data
736
+ return data[[col for col in COL_MAP['draft_rankings'].values() if col in data.columns]]
687
737
 
688
738
  def nhl_scrape_game_info(game_ids:list[int]):
689
739
  """
@@ -698,6 +748,9 @@ def nhl_scrape_game_info(game_ids:list[int]):
698
748
  An DataFrame containing information for each game.
699
749
  """
700
750
 
751
+ #Wrap game_id in a list if only a single game_id is provided
752
+ game_ids = [game_ids] if type(game_ids) != list else game_ids
753
+
701
754
  print(f'Finding game information for games: {game_ids}')
702
755
 
703
756
  link = 'https://api-web.nhle.com/v1/gamecenter'
@@ -706,18 +759,15 @@ def nhl_scrape_game_info(game_ids:list[int]):
706
759
  df = pd.concat([pd.json_normalize(rs.get(f'{link}/{game_id}/landing').json()) for game_id in game_ids])
707
760
 
708
761
  #Add extra info
709
- df['date'] = df['gameDate']
710
- df['season_type'] = df['gameType']
711
- df['away_team_abbr'] = df['awayTeam.abbrev']
712
- df['home_team_abbr'] = df['homeTeam.abbrev']
713
- df['game_title'] = df['away_team_abbr'] + " @ " + df['home_team_abbr'] + " - " + df['date']
714
- df['estStartTime'] = pd.to_datetime(df['startTimeUTC']).dt.tz_convert('US/Eastern').dt.strftime("%I:%M %p")
762
+ df['game_date'] = df['gameDate']
763
+ df['game_title'] = df['awayTeam.abbrev'] + " @ " + df['homeTeam.abbrev'] + " - " + df['game_date']
764
+ df['start_time_est'] = pd.to_datetime(df['startTimeUTC']).dt.tz_convert('US/Eastern').dt.strftime("%I:%M %p")
715
765
 
716
- front_col = ['id','season','date','season_type','game_title','away_team_abbr','home_team_abbr','estStartTime']
717
- df = df[front_col+[col for col in df.columns.to_list() if col not in front_col]]
766
+ #Standardize columns
767
+ df = df.rename(columns=COL_MAP['schedule'])
718
768
 
719
769
  #Return: game information
720
- return df
770
+ return df[[col for col in COL_MAP['schedule'].values() if col in df.columns]]
721
771
 
722
772
 
723
773
  def nhl_apply_xG(pbp: pd.DataFrame):
@@ -1172,23 +1222,23 @@ def nhl_calculate_stats(pbp:pd.DataFrame, type:Literal['skater','goalie','team']
1172
1222
 
1173
1223
  #Import rosters and player info
1174
1224
  rosters = pd.read_csv(roster_path)
1175
- names = rosters[['id','fullName',
1176
- 'headshot','positionCode','shootsCatches',
1177
- 'heightInInches','weightInPounds',
1178
- 'birthDate','birthCountry']].drop_duplicates(subset=['id','fullName'],keep='last')
1225
+ names = rosters[['player_id','player_name',
1226
+ 'headshot','position','handedness',
1227
+ 'height_in','weight_lbs',
1228
+ 'birth_date','birth_country']].drop_duplicates(subset=['player_id','player_name'],keep='last')
1179
1229
 
1180
1230
  #Add names
1181
- complete = pd.merge(complete,names,how='left',left_on='ID',right_on='id')
1231
+ complete = pd.merge(complete,names,how='left',left_on='ID',right_on='player_id')
1182
1232
 
1183
1233
  #Rename if there are no missing names
1184
- complete = complete.rename(columns={'fullName':'Goalie',
1234
+ complete = complete.rename(columns={'player_name':'Goalie',
1185
1235
  'headshot':'Headshot',
1186
- 'positionCode':'Position',
1187
- 'shootsCatches':'Handedness',
1188
- 'heightInInches':'Height (in)',
1189
- 'weightInPounds':'Weight (lbs)',
1190
- 'birthDate':'Birthday',
1191
- 'birthCountry':'Nationality'})
1236
+ 'position':'Position',
1237
+ 'handedness':'Handedness',
1238
+ 'height_in':'Height (in)',
1239
+ 'weight_lbs':'Weight (lbs)',
1240
+ 'birth_date':'Birthday',
1241
+ 'birth_country':'Nationality'})
1192
1242
 
1193
1243
  #WSBA
1194
1244
  complete['WSBA'] = complete['ID'].astype(str).str.replace('.0','')+complete['Team']+complete['Season'].astype(str)
@@ -1300,23 +1350,23 @@ def nhl_calculate_stats(pbp:pd.DataFrame, type:Literal['skater','goalie','team']
1300
1350
 
1301
1351
  #Import rosters and player info
1302
1352
  rosters = pd.read_csv(roster_path)
1303
- names = rosters[['id','fullName',
1304
- 'headshot','positionCode','shootsCatches',
1305
- 'heightInInches','weightInPounds',
1306
- 'birthDate','birthCountry']].drop_duplicates(subset=['id','fullName'],keep='last')
1353
+ names = rosters[['player_id','player_name',
1354
+ 'headshot','position','handedness',
1355
+ 'height_in','weight_lbs',
1356
+ 'birth_date','birth_country']].drop_duplicates(subset=['player_id','player_name'],keep='last')
1307
1357
 
1308
1358
  #Add names
1309
- complete = pd.merge(complete,names,how='left',left_on='ID',right_on='id')
1359
+ complete = pd.merge(complete,names,how='left',left_on='ID',right_on='player_id')
1310
1360
 
1311
1361
  #Rename if there are no missing names
1312
- complete = complete.rename(columns={'fullName':'Player',
1362
+ complete = complete.rename(columns={'player_name':'Player',
1313
1363
  'headshot':'Headshot',
1314
- 'positionCode':'Position',
1315
- 'shootsCatches':'Handedness',
1316
- 'heightInInches':'Height (in)',
1317
- 'weightInPounds':'Weight (lbs)',
1318
- 'birthDate':'Birthday',
1319
- 'birthCountry':'Nationality'})
1364
+ 'position':'Position',
1365
+ 'handedness':'Handedness',
1366
+ 'height_in':'Height (in)',
1367
+ 'weight_lbs':'Weight (lbs)',
1368
+ 'birth_date':'Birthday',
1369
+ 'birth_country':'Nationality'})
1320
1370
 
1321
1371
  #Set TOI to minute
1322
1372
  complete['TOI'] = complete['TOI']/60
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: wsba_hockey
3
- Version: 1.2.2
3
+ Version: 1.2.4
4
4
  Summary: WeakSide Breakout's complete Python package of access to hockey data, primairly including the scraping of National Hockey League schedule, play-by-play, and shifts information.
5
5
  Author-email: Owen Singh <owenbksingh@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/owensingh38/wsba_hockey/
@@ -0,0 +1,17 @@
1
+ wsba_hockey/__init__.py,sha256=DozjAXICrGwXBCF1ej2rgRZw6BtYtNimLqddD0gO4gM,595
2
+ wsba_hockey/wsba_main.py,sha256=BvDcMv3SwlvCbuJgsAf2qo0naZ8v27ErPyDP09DIylU,76187
3
+ wsba_hockey/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ wsba_hockey/tools/agg.py,sha256=OkIYd-ApvGVYe2JJLOI21jnDIN5LH8nkeH7eo0reWFI,23364
5
+ wsba_hockey/tools/columns.py,sha256=S3956AVFGMyXdk86ssQqJrwQC-N7tWT3PQ_884W33w8,14020
6
+ wsba_hockey/tools/game_pred.py,sha256=OGh6o1vIcyLUixU80hOO0RPGNmDSY1cvvCNZFcP0wL4,1308
7
+ wsba_hockey/tools/plotting.py,sha256=Ix-Kj-FEP4_uwSwyb5lIRj3tOFdO1z7XUjWManAkY24,6013
8
+ wsba_hockey/tools/scraping.py,sha256=6_GyF8o56fuijTosm4x4OSrvpL61ZygluK2A26XajqU,52246
9
+ wsba_hockey/tools/xg_model.py,sha256=rY6D1YMuSZkORqHd8ZCp-2gBmH9KUG0DoyHEiXpzWlg,18708
10
+ wsba_hockey/tools/archive/old_scraping.py,sha256=hEjMI1RtfeZnf0RBiJFI38oXkLZ3WofeH5xqcF4pzgM,49585
11
+ wsba_hockey/tools/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
+ wsba_hockey/tools/utils/shared.py,sha256=KxeQVttGem73yncAlnuZvTclqpJpoerTKtLusRh5zsk,2472
13
+ wsba_hockey-1.2.4.dist-info/licenses/LICENSE,sha256=Nr_Um1Pd5FQJTWWgm7maZArdtYMbDhzXYSwyJIZDGik,1114
14
+ wsba_hockey-1.2.4.dist-info/METADATA,sha256=ObmBmiBvwQ3KIk17J0ydi5D2j-ovatxGbj51ksY47qw,3592
15
+ wsba_hockey-1.2.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
16
+ wsba_hockey-1.2.4.dist-info/top_level.txt,sha256=acU7s3x-RZC1zGiqCOmO0g267iqCg34lzIfdmYxxGmQ,12
17
+ wsba_hockey-1.2.4.dist-info/RECORD,,
@@ -1,16 +0,0 @@
1
- wsba_hockey/__init__.py,sha256=4wdj-GjqGGb3BnnyLlvRXYS7wNoaLAzkfVnz6kM8v7g,566
2
- wsba_hockey/wsba_main.py,sha256=iXh4OSgqLLuxBV-5WthQ-2ISESo3qusm9InM_K7KQKc,74170
3
- wsba_hockey/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- wsba_hockey/tools/agg.py,sha256=OkIYd-ApvGVYe2JJLOI21jnDIN5LH8nkeH7eo0reWFI,23364
5
- wsba_hockey/tools/game_pred.py,sha256=OGh6o1vIcyLUixU80hOO0RPGNmDSY1cvvCNZFcP0wL4,1308
6
- wsba_hockey/tools/plotting.py,sha256=81hBaM7tcwUNB4-tovPn7QreOUz6B2NuI_SR4-djVSk,6029
7
- wsba_hockey/tools/scraping.py,sha256=6_GyF8o56fuijTosm4x4OSrvpL61ZygluK2A26XajqU,52246
8
- wsba_hockey/tools/xg_model.py,sha256=OqSvr1Er3zGaY6ZTBnuulBTPLO6CPhNk97SwpnkRD6M,18686
9
- wsba_hockey/tools/archive/old_scraping.py,sha256=hEjMI1RtfeZnf0RBiJFI38oXkLZ3WofeH5xqcF4pzgM,49585
10
- wsba_hockey/tools/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
- wsba_hockey/tools/utils/shared.py,sha256=KxeQVttGem73yncAlnuZvTclqpJpoerTKtLusRh5zsk,2472
12
- wsba_hockey-1.2.2.dist-info/licenses/LICENSE,sha256=Nr_Um1Pd5FQJTWWgm7maZArdtYMbDhzXYSwyJIZDGik,1114
13
- wsba_hockey-1.2.2.dist-info/METADATA,sha256=8lZANGgWtAMVI7QfEr2cP66kM8WzUXv-YYeV6z1IL4A,3592
14
- wsba_hockey-1.2.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
15
- wsba_hockey-1.2.2.dist-info/top_level.txt,sha256=acU7s3x-RZC1zGiqCOmO0g267iqCg34lzIfdmYxxGmQ,12
16
- wsba_hockey-1.2.2.dist-info/RECORD,,