wsba-hockey 1.2.2__tar.gz → 1.2.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (22) hide show
  1. {wsba_hockey-1.2.2/src/wsba_hockey.egg-info → wsba_hockey-1.2.3}/PKG-INFO +1 -1
  2. {wsba_hockey-1.2.2 → wsba_hockey-1.2.3}/pyproject.toml +1 -1
  3. {wsba_hockey-1.2.2 → wsba_hockey-1.2.3}/src/wsba_hockey/__init__.py +1 -0
  4. wsba_hockey-1.2.3/src/wsba_hockey/tools/columns.py +281 -0
  5. {wsba_hockey-1.2.2 → wsba_hockey-1.2.3}/src/wsba_hockey/tools/plotting.py +5 -5
  6. {wsba_hockey-1.2.2 → wsba_hockey-1.2.3}/src/wsba_hockey/tools/xg_model.py +6 -6
  7. {wsba_hockey-1.2.2 → wsba_hockey-1.2.3}/src/wsba_hockey/wsba_main.py +121 -78
  8. {wsba_hockey-1.2.2 → wsba_hockey-1.2.3/src/wsba_hockey.egg-info}/PKG-INFO +1 -1
  9. {wsba_hockey-1.2.2 → wsba_hockey-1.2.3}/src/wsba_hockey.egg-info/SOURCES.txt +1 -0
  10. {wsba_hockey-1.2.2 → wsba_hockey-1.2.3}/LICENSE +0 -0
  11. {wsba_hockey-1.2.2 → wsba_hockey-1.2.3}/README.md +0 -0
  12. {wsba_hockey-1.2.2 → wsba_hockey-1.2.3}/setup.cfg +0 -0
  13. {wsba_hockey-1.2.2 → wsba_hockey-1.2.3}/src/wsba_hockey/tools/__init__.py +0 -0
  14. {wsba_hockey-1.2.2 → wsba_hockey-1.2.3}/src/wsba_hockey/tools/agg.py +0 -0
  15. {wsba_hockey-1.2.2 → wsba_hockey-1.2.3}/src/wsba_hockey/tools/archive/old_scraping.py +0 -0
  16. {wsba_hockey-1.2.2 → wsba_hockey-1.2.3}/src/wsba_hockey/tools/game_pred.py +0 -0
  17. {wsba_hockey-1.2.2 → wsba_hockey-1.2.3}/src/wsba_hockey/tools/scraping.py +0 -0
  18. {wsba_hockey-1.2.2 → wsba_hockey-1.2.3}/src/wsba_hockey/tools/utils/__init__.py +0 -0
  19. {wsba_hockey-1.2.2 → wsba_hockey-1.2.3}/src/wsba_hockey/tools/utils/shared.py +0 -0
  20. {wsba_hockey-1.2.2 → wsba_hockey-1.2.3}/src/wsba_hockey.egg-info/dependency_links.txt +0 -0
  21. {wsba_hockey-1.2.2 → wsba_hockey-1.2.3}/src/wsba_hockey.egg-info/top_level.txt +0 -0
  22. {wsba_hockey-1.2.2 → wsba_hockey-1.2.3}/tests/tests.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: wsba_hockey
3
- Version: 1.2.2
3
+ Version: 1.2.3
4
4
  Summary: WeakSide Breakout's complete Python package of access to hockey data, primairly including the scraping of National Hockey League schedule, play-by-play, and shifts information.
5
5
  Author-email: Owen Singh <owenbksingh@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/owensingh38/wsba_hockey/
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "wsba_hockey"
3
- version = "1.2.2"
3
+ version = "1.2.3"
4
4
  authors = [
5
5
  { name="Owen Singh", email="owenbksingh@gmail.com" },
6
6
  ]
@@ -7,6 +7,7 @@ from wsba_hockey.wsba_main import (
7
7
  nhl_scrape_roster,
8
8
  nhl_scrape_draft_rankings,
9
9
  nhl_scrape_prospects,
10
+ nhl_scrape_player_info,
10
11
  nhl_scrape_team_info,
11
12
  nhl_scrape_game_info,
12
13
  nhl_calculate_stats,
@@ -0,0 +1,281 @@
1
+ ### COLUMN NAMES ###
2
+ # Provided in this file are dicts to standardize column names as necessary #
3
+
4
+ def col_map():
5
+ return {
6
+ 'schedule':{
7
+ 'id':'game_id',
8
+ 'game_date':'game_date',
9
+ 'gameType':'season_type',
10
+ 'neutralSite':'neutral_site',
11
+ 'startTimeUTC':'start_time_utc',
12
+ 'start_time_est':'start_time_est',
13
+ 'easternUTCOffset':'eastern_utc',
14
+ 'venueUTCOffset':'venue_utc',
15
+ 'venueTimezone':'venue_timezone',
16
+ 'gameState':'game_state',
17
+ 'gameScheduleState':'game_schedule_state',
18
+ 'tvBroadcasts':'tv_broadcasts',
19
+ 'threeMinRecap':'three_min_recap',
20
+ 'threeMinRecapFr':'three_min_recap_fr',
21
+ 'condensedGame':'condensed_game',
22
+ 'condensedGameFr':'condensed_game_fr',
23
+ 'gameCenterLink':'gamecenter_link',
24
+ 'venue.default':'venue',
25
+ 'awayTeam.id':'away_team_id',
26
+ 'homeTeam.id':'home_team_id',
27
+ 'awayTeam.abbrev':'away_team_abbr',
28
+ 'homeTeam.abbrev':'home_team_abbr',
29
+ 'awayTeam.darkLogo':'away_team_logo',
30
+ 'homeTeam.darkLogo':'home_team_logo',
31
+ 'awayTeam.awaySplitSquad':'away_team_split_squad',
32
+ 'homeTeam.homeSplitSquad':'home_team_split_squad',
33
+ 'awayTeam.score':'away_score',
34
+ 'homeTeam.score':'home_score',
35
+ 'periodDescriptor.number':'period',
36
+ 'periodDescriptor.maxRegulationPeriods':'period_max_regulation',
37
+ 'periodDescriptor.periodType':'period_type',
38
+ 'gameOutcome.lastPeriodType':'period_type_last',
39
+ 'seriesUrl':'series_url',
40
+ 'seriesStatus.round':'series_round',
41
+ 'seriesStatus.seriesAbbrev':'series_abbr',
42
+ 'seriesStatus.seriesTitle':'series_name',
43
+ 'seriesStatus.seriesLetter':'series_letter',
44
+ 'seriesStatus.neededToWin':'series_games_needed_to_win',
45
+ 'seriesStatus.gameNumberOfSeries':'series_game_num',
46
+ 'seriesStatus.topSeedTeamAbbrev':'top_seed_team_abbr',
47
+ 'seriesStatus.bottomSeedTeamAbbrev':'bottom_seed_team_abbr',
48
+ 'seriesStatus.topSeedWins':'top_seed_wins',
49
+ 'seriesStatus.bottomSeedWins':'bottom_seed_wins'
50
+ },
51
+ 'season_info':{
52
+ 'id':'season',
53
+ 'formattedSeasonId':'season_name',
54
+ 'seasonOrdinal':'season_ordinal',
55
+ 'startDate':'start_date',
56
+ 'endDate':'end_date',
57
+ 'numberOfGames':'games_num',
58
+ 'totalRegularSeasonGames':'regular_season_total_games_num',
59
+ 'totalPlayoffGames':'playoffs_total_games_num',
60
+ 'regularSeasonStartDate':'regular_season_start_date',
61
+ 'regularSeasonEndDate':'regular_season_end_date',
62
+ 'standingsStart':'standings_start_date',
63
+ 'standingsEnd':'standings_end_date',
64
+ 'allStarGameInUse':'all_star_game',
65
+ 'conferencesInUse':'conferences',
66
+ 'divisionsInUse':'divisions',
67
+ 'wildcardInUse':'wildcard',
68
+ 'entryDraftInUse':'entry_draft',
69
+ 'nhlStanleyCupOwner':'nhl_stanley_cup_owner',
70
+ 'olympicsParticipation':'olympic_participation',
71
+ 'tiesInUse':'ties',
72
+ 'pointForOTLossInUse':'ot_loss_point',
73
+ 'regulationWinsInUse':'reg_wins',
74
+ 'rowInUse':'row',
75
+ 'supplementalDraftInUse':'supplemental_draft'
76
+ },
77
+ 'standings':{
78
+ "clinchIndicator": "clinch_indicator",
79
+ "conferenceAbbrev": "conference_abbr",
80
+ "conferenceHomeSequence": "conference_home_sequence",
81
+ "conferenceL10Sequence": "conference_l10_sequence",
82
+ "conferenceName": "conference_name",
83
+ "conferenceRoadSequence": "conference_road_sequence",
84
+ "conferenceSequence": "conference_sequence",
85
+ "date": "date",
86
+ "divisionAbbrev": "division_abbr",
87
+ "divisionHomeSequence": "division_home_sequence",
88
+ "divisionL10Sequence": "division_l10_sequence",
89
+ "divisionName": "division_name",
90
+ "divisionRoadSequence": "division_road_sequence",
91
+ "divisionSequence": "division_sequence",
92
+ "gameTypeId": "game_type_id",
93
+ "gamesPlayed": "GP",
94
+ "goalDifferential": "GD",
95
+ "goalDifferentialPctg": "GD%",
96
+ "goalAgainst": "GA",
97
+ "goalFor": "GF",
98
+ "goalsForPctg": "GF%",
99
+ "homeGamesPlayed": "HGP",
100
+ "homeGoalDifferential": "HGD",
101
+ "homeGoalsAgainst": "HGA",
102
+ "homeGoalsFor": "HGF",
103
+ "homeLosses": "HL",
104
+ "homeOtLosses": "HOL",
105
+ "homePoints": "HPTS",
106
+ "homeRegulationPlusOtWins": "HROW",
107
+ "homeRegulationWins": "HRW",
108
+ "homeTies": "HT",
109
+ "homeWins": "HW",
110
+ "l10GamesPlayed": "l10_GP",
111
+ "l10GoalDifferential": "l10_GD",
112
+ "l10GoalsAgainst": "l10_GA",
113
+ "l10GoalsFor": "l10_GF",
114
+ "l10Losses": "l10_L",
115
+ "l10OtLosses": "l10_OTL",
116
+ "l10Points": "l10_PTS",
117
+ "l10RegulationPlusOtWins": "l10_ROW",
118
+ "l10RegulationWins": "l10_RW",
119
+ "l10Ties": "l10_T",
120
+ "l10Wins": "l10_W",
121
+ "leagueHomeSequence": "league_home_sequence",
122
+ "leagueL10Sequence": "league_l10_sequence",
123
+ "leagueRoadSequence": "league_road_sequence",
124
+ "leagueSequence": "league_sequence",
125
+ "losses": "L",
126
+ "otLosses": "OTL",
127
+ "pointPctg": "PTS%",
128
+ "points": "PTS",
129
+ "regulationPlusOtWinPctg": "ROW%",
130
+ "regulationPlusOtWins": "ROW",
131
+ "regulationWinPctg": "RW%",
132
+ "regulationWins": "RW",
133
+ "roadGamesPlayed": "AGP",
134
+ "roadGoalDifferential": "AGD",
135
+ "roadGoalsAgainst": "AGA",
136
+ "roadGoalsFor": "AGF",
137
+ "roadLosses": "AL",
138
+ "roadOtLosses": "AOTL",
139
+ "roadPoints": "APTS",
140
+ "roadRegulationPlusOtWins": "AROW",
141
+ "roadRegulationWins": "ARW",
142
+ "roadTies": "AT",
143
+ "roadWins": "AW",
144
+ "seasonId": "season",
145
+ "shootoutLosses": "SOL",
146
+ "shootoutWins": "SOW",
147
+ "streakCode": "streak_code",
148
+ "streakCount": "streak_count",
149
+ "teamLogo": "team_logo",
150
+ "ties": "T",
151
+ "waiversSequence": "waivers_sequence",
152
+ "wildcardSequence": "wildcard_sequence",
153
+ "winPctg": "W%",
154
+ "wins": "W",
155
+ "placeName.default": "place_name",
156
+ "teamName.default": "team_name",
157
+ "teamCommonName.default": "team_common_name",
158
+ "teamAbbrev.default": "team_abbr"
159
+ },
160
+ 'roster':{
161
+ "id": "player_id",
162
+ "team_abbr": "team_abbr",
163
+ "season": "season",
164
+ "headshot": "headshot",
165
+ "sweaterNumber": "sweater_number",
166
+ "positionCode": "position",
167
+ "heading_position": "heading_position",
168
+ "shootsCatches": "handedness",
169
+ "heightInInches": "height_in",
170
+ "weightInPounds": "weight_lbs",
171
+ "heightInCentimeters": "height_cm",
172
+ "weightInKilograms": "weight_kg",
173
+ "birthDate": "birth_date",
174
+ "birthCountry": "birth_country",
175
+ "player_name":"player_name",
176
+ "firstName.default": "player_first_name",
177
+ "lastName.default": "player_last_name",
178
+ "birthCity.default": "birth_city",
179
+ "birthStateProvince.default": "birth_state_province"
180
+ },
181
+ 'prospects':{
182
+ "id": "player_id",
183
+ "headshot": "headshot",
184
+ "sweaterNumber": "sweater_number",
185
+ "positionCode": "position",
186
+ "shootsCatches": "handedness",
187
+ "heightInInches": "height_in",
188
+ "weightInPounds": "weight_lbs",
189
+ "heightInCentimeters": "height_cm",
190
+ "weightInKilograms": "weight_kg",
191
+ "birthDate": "birth_date",
192
+ "birthCountry": "birth_country",
193
+ "player_name":"player_name",
194
+ "firstName.default": "player_first_name",
195
+ "lastName.default": "player_last_name",
196
+ "birthCity.default": "birth_city",
197
+ "birthStateProvince.default": "birth_state_province",
198
+ },
199
+ 'player_info':{
200
+ 'playerId': 'player_id',
201
+ 'player_name': 'player_name',
202
+ 'isActive': 'is_active',
203
+ 'currentTeamId': 'current_team_id',
204
+ 'currentTeamAbbrev': 'current_team_abbr',
205
+ 'badges': 'badges',
206
+ 'teamLogo': 'team_logo',
207
+ 'sweaterNumber': 'sweater_number',
208
+ 'position': 'position',
209
+ 'headshot': 'headshot',
210
+ 'heroImage': 'hero_image',
211
+ 'heightInInches': 'height_in',
212
+ 'heightInCentimeters': 'height_cm',
213
+ 'weightInPounds': 'weight_lbs',
214
+ 'weightInKilograms': 'weight_kg',
215
+ 'birthDate': 'birth_date',
216
+ 'birthCountry': 'birth_country',
217
+ 'shootsCatches': 'handedness',
218
+ 'playerSlug': 'player_slug',
219
+ 'inTop100AllTime': 'in_top_100_all_time',
220
+ 'inHHOF': 'in_hhof',
221
+ 'shopLink': 'shop_link',
222
+ 'twitterLink': 'twitter_link',
223
+ 'watchLink': 'watch_link',
224
+ 'last5Games': 'last_5_games',
225
+ 'seasonTotals': 'season_totals',
226
+ 'awards': 'awards',
227
+ 'currentTeamRoster': 'current_team_roster',
228
+ 'fullTeamName.default': 'full_team_name',
229
+ 'teamCommonName.default': 'team_common_name',
230
+ 'teamPlaceNameWithPreposition.default': 'team_place_name_with_preposition',
231
+ 'firstName.default': 'player_first_name',
232
+ 'lastName.default': 'player_last_name',
233
+ 'birthCity.default': 'birth_city',
234
+ 'birthStateProvince.default': 'birth_state_province',
235
+ 'draftDetails.year': 'draft_year',
236
+ 'draftDetails.teamAbbrev': 'draft_team_abbr',
237
+ 'draftDetails.round': 'draft_round',
238
+ 'draftDetails.pickInRound': 'draft_pick_in_round',
239
+ 'draftDetails.overallPick': 'draft_overall_pick',
240
+ },
241
+ 'team_info':{
242
+ "id":"team_id",
243
+ "franchiseId":"franchise_id",
244
+ "fullName":"team_name",
245
+ "leagueId":"league_id",
246
+ "triCode":"team_abbr",
247
+ "logo_light":"logo_light",
248
+ "logo_dark":"logo_dark",
249
+ "country3Code":"country_abbr",
250
+ "countryCode":"country_abbr_2",
251
+ "countryName":"country_name",
252
+ "hasPlayerStats":"has_player_stats",
253
+ "imageUrl":"country_flag_large",
254
+ "isActive":"is_active",
255
+ "nationalityName":"nationality",
256
+ "olympicUrl":"olympic_url",
257
+ "thumbnailUrl":"thumbnail_url"
258
+ },
259
+ 'draft_rankings':{
260
+ "id": "player_id",
261
+ "player_name":"player_name",
262
+ "firstName": "player_first_name",
263
+ "lastName": "player_last_name",
264
+ "headshot": "headshot",
265
+ "sweaterNumber": "sweater_number",
266
+ "positionCode": "position",
267
+ "shootsCatches": "handedness",
268
+ "heightInInches": "height_in",
269
+ "weightInPounds": "weight_lbs",
270
+ "heightInCentimeters": "height_cm",
271
+ "weightInKilograms": "weight_kg",
272
+ "birthDate": "birth_date",
273
+ "birthCity": "birth_city",
274
+ "birthCountry": "birth_country",
275
+ "birthStateProvince": "birth_state_province",
276
+ "lastAmateurClub": "last_amateur_club",
277
+ "lastAmateurLeague": "last_amateur_league",
278
+ "midtermRank":"midterm_rank",
279
+ "finalRank":"final_rank"
280
+ }
281
+ }
@@ -50,7 +50,7 @@ def prep_plot_data(pbp,events,strengths,marker_dict=event_markers):
50
50
  pbp = wsba_xG(pbp)
51
51
  pbp['xG'] = np.where(pbp['xG'].isna(),0,pbp['xG'])
52
52
 
53
- pbp['WSBA'] = pbp['event_player_1_name']+pbp['season'].astype(str)+pbp['event_team_abbr']
53
+ pbp['WSBA'] = pbp['event_player_1_id'].astype(str)+pbp['season'].astype(str)+pbp['event_team_abbr']
54
54
 
55
55
  pbp['x_plot'] = np.where(pbp['x']<0,-pbp['y_adj'],pbp['y_adj'])
56
56
  pbp['y_plot'] = abs(pbp['x_adj'])
@@ -88,8 +88,8 @@ def plot_skater_shots(pbp, player, season, team, strengths, title = None, marker
88
88
  pbp = pbp.loc[(pbp['season'].astype(str)==season)&((pbp['away_team_abbr']==team)|(pbp['home_team_abbr']==team))]
89
89
 
90
90
  team_data = pd.read_csv(info_path)
91
- team_color = list(team_data.loc[team_data['WSBA']==f'{team}{season}','Primary Color'])[0]
92
- team_color_2nd = list(team_data.loc[team_data['WSBA']==f'{team}{season}','Secondary Color'])[0]
91
+ team_color = list(team_data.loc[team_data['WSBA']==f'{team}{season}','primary_color'])[0]
92
+ team_color_2nd = list(team_data.loc[team_data['WSBA']==f'{team}{season}','secondary_color'])[0]
93
93
 
94
94
  if onice in ['for','against']:
95
95
  skater = pbp.loc[(pbp[f'onice_{onice}'].str.contains(player.upper()))]
@@ -123,8 +123,8 @@ def plot_game_events(pbp,game_id,events,strengths,marker_dict=event_markers,team
123
123
 
124
124
  team_data = pd.read_csv(info_path)
125
125
  team_info ={
126
- 'away_color':'#000000' if list(team_data.loc[team_data['WSBA']==f'{away_abbr}{season}','Secondary Color'])[0]=='#FFFFFF' else list(team_data.loc[team_data['WSBA']==f'{away_abbr}{season}',f'{team_colors['away'].capitalize()} Color'])[0],
127
- 'home_color': list(team_data.loc[team_data['WSBA']==f'{home_abbr}{season}',f'{team_colors['home'].capitalize()} Color'])[0],
126
+ 'away_color':'#000000' if list(team_data.loc[team_data['WSBA']==f'{away_abbr}{season}','secondary_color'])[0]=='#FFFFFF' else list(team_data.loc[team_data['WSBA']==f'{away_abbr}{season}',f'{team_colors['away']}_color'])[0],
127
+ 'home_color': list(team_data.loc[team_data['WSBA']==f'{home_abbr}{season}',f'{team_colors['home']}_color'])[0],
128
128
  'away_logo': f'tools/logos/png/{away_abbr}{season}.png',
129
129
  'home_logo': f'tools/logos/png/{home_abbr}{season}.png',
130
130
  }
@@ -90,18 +90,18 @@ def fix_players(pbp):
90
90
  print('Adding player info to pbp...')
91
91
 
92
92
  #Load roster and all players
93
- roster = pd.read_csv(roster_path).drop_duplicates(['id'])[['fullName','id','shootsCatches']]
93
+ roster = pd.read_csv(roster_path).drop_duplicates(['player_id'])[['player_name','player_id','handedness']]
94
94
 
95
95
  #Some players are missing from the roster file (generally in newer seasons); add these manually
96
- miss = list(pbp.loc[~(pbp['event_player_1_id'].isin(list(roster['id'])))&(pbp['event_player_1_id'].notna()),'event_player_1_id'].drop_duplicates())
96
+ miss = list(pbp.loc[~(pbp['event_player_1_id'].isin(list(roster['player_id'])))&(pbp['event_player_1_id'].notna()),'event_player_1_id'].drop_duplicates())
97
97
  if miss:
98
- add = wsba.nhl_scrape_player_data(miss).rename(columns={'playerId':'id'})[['fullName','id','shootsCatches']]
98
+ add = wsba.nhl_scrape_player_info(miss)[['player_name','player_id','handedness']]
99
99
  roster = pd.concat([roster,add]).reset_index(drop=True)
100
100
 
101
101
  #Conversion dict
102
- roster['id'] = roster['id'].astype(str)
103
- roster_dict = roster.set_index('id').to_dict()['shootsCatches']
104
- names_dict = roster.set_index('id').to_dict()['fullName']
102
+ roster['player_id'] = roster['player_id'].astype(str)
103
+ roster_dict = roster.set_index('player_id').to_dict()['handedness']
104
+ names_dict = roster.set_index('player_id').to_dict()['player_name']
105
105
 
106
106
  #Add player names
107
107
  for i in range(3):
@@ -11,6 +11,7 @@ from wsba_hockey.tools.scraping import *
11
11
  from wsba_hockey.tools.xg_model import *
12
12
  from wsba_hockey.tools.agg import *
13
13
  from wsba_hockey.tools.plotting import *
14
+ from wsba_hockey.tools.columns import col_map
14
15
 
15
16
  ### WSBA HOCKEY ###
16
17
  ## Provided below are all integral functions in the WSBA Hockey Python package. ##
@@ -114,8 +115,19 @@ SCHEDULE_PATH = os.path.join(DIR,'tools\\schedule\\schedule.csv')
114
115
  INFO_PATH = os.path.join(DIR,'tools\\teaminfo\\nhl_teaminfo.csv')
115
116
  DEFAULT_ROSTER = os.path.join(DIR,'tools\\rosters\\nhl_rosters.csv')
116
117
 
118
+ #Load column names for standardization
119
+ COL_MAP = col_map()
120
+
121
+ DRAFT_CAT = {
122
+ 0: 'All Prospects',
123
+ 1: 'North American Skaters',
124
+ 2: 'International Skater',
125
+ 3: 'North American Goalies',
126
+ 4: 'International Goalies'
127
+ }
128
+
117
129
  ## SCRAPE FUNCTIONS ##
118
- def nhl_scrape_game(game_ids:list[int], split_shifts:bool = False, remove:list[str] = [], verbose:bool = False, sources:bool = False, errors:bool = False):
130
+ def nhl_scrape_game(game_ids:int | list[int], split_shifts:bool = False, remove:list[str] = [], verbose:bool = False, sources:bool = False, errors:bool = False):
119
131
  """
120
132
  Given a set of game_ids (NHL API), return complete play-by-play information as requested.
121
133
 
@@ -319,24 +331,17 @@ def nhl_scrape_schedule(season:int, start:str = '', end:str = ''):
319
331
  if gameWeek.empty:
320
332
  game.append(gameWeek)
321
333
  else:
322
- gameWeek['date'] = get['gameWeek'][0]['date']
323
-
324
- gameWeek['season_type'] = gameWeek['gameType']
325
- gameWeek['away_team_abbr'] = gameWeek['awayTeam.abbrev']
326
- gameWeek['home_team_abbr'] = gameWeek['homeTeam.abbrev']
327
- gameWeek['game_title'] = gameWeek['away_team_abbr'] + " @ " + gameWeek['home_team_abbr'] + " - " + gameWeek['date']
328
- gameWeek['estStartTime'] = pd.to_datetime(gameWeek['startTimeUTC']).dt.tz_convert('US/Eastern').dt.strftime("%I:%M %p")
329
-
330
- front_col = ['id','season','date','season_type','game_title','away_team_abbr','home_team_abbr','estStartTime']
331
- gameWeek = gameWeek[front_col+[col for col in gameWeek.columns.to_list() if col not in front_col]]
334
+ gameWeek['game_date'] = get['gameWeek'][0]['date']
335
+ gameWeek['game_title'] = gameWeek['awayTeam.abbrev'] + " @ " + gameWeek['homeTeam.abbrev'] + " - " + gameWeek['game_date']
336
+ gameWeek['start_time_est'] = pd.to_datetime(gameWeek['startTimeUTC']).dt.tz_convert('US/Eastern').dt.strftime("%I:%M %p")
332
337
 
333
338
  game.append(gameWeek)
334
339
 
335
- #Concatenate all games
336
- df = pd.concat(game)
340
+ #Concatenate all games and standardize column naming
341
+ df = pd.concat(game).rename(columns=COL_MAP['schedule'],errors='ignore')
337
342
 
338
343
  #Return: specificed schedule data
339
- return df
344
+ return df[[col for col in COL_MAP['schedule'].values() if col in df.columns]]
340
345
 
341
346
  def nhl_scrape_season(season:int, split_shifts:bool = False, season_types:list[int] = [2,3], remove:list[str] = [], start:str = '', end:str = '', local:bool=False, local_path:str = SCHEDULE_PATH, verbose:bool = False, sources:bool = False, errors:bool = False):
342
347
  """
@@ -399,13 +404,13 @@ def nhl_scrape_season(season:int, split_shifts:bool = False, season_types:list[i
399
404
 
400
405
  load = load.loc[(load['season']==season)&
401
406
  (load['season_type'].isin(season_types))&
402
- (load['date']>=start)&(load['date']<=end)]
407
+ (load['game_date']>=start)&(load['game_date']<=end)]
403
408
 
404
- game_ids = load['id'].to_list()
409
+ game_ids = load['game_id'].to_list()
405
410
  else:
406
411
  load = nhl_scrape_schedule(season,start,end)
407
412
  load = load.loc[(load['season']==season)&(load['season_type'].isin(season_types))]
408
- game_ids = load['id'].to_list()
413
+ game_ids = load['game_id'].to_list()
409
414
 
410
415
  #If no games found, terminate the process
411
416
  if not game_ids:
@@ -440,10 +445,9 @@ def nhl_scrape_seasons_info(seasons:list[int] = []):
440
445
  A DataFrame containing the information for requested seasons.
441
446
  """
442
447
 
443
- #
444
- # param 'season' - list of seasons to include
445
-
446
448
  print(f'Scraping info for seasons: {seasons}')
449
+
450
+ #Load two different data sources: general season info and standings data related to season
447
451
  api = "https://api.nhle.com/stats/rest/en/season"
448
452
  info = "https://api-web.nhle.com/v1/standings-season"
449
453
  data = rs.get(api).json()['data']
@@ -452,12 +456,17 @@ def nhl_scrape_seasons_info(seasons:list[int] = []):
452
456
  df = pd.json_normalize(data)
453
457
  df_2 = pd.json_normalize(data_2)
454
458
 
455
- df = pd.merge(df,df_2,how='outer',on=['id'])
459
+ #Remove common columns
460
+ df_2 = df_2.drop(columns=['conferencesInUse', 'divisionsInUse', 'pointForOTlossInUse','rowInUse','tiesInUse','wildcardInUse'])
461
+
462
+ df = pd.merge(df,df_2,how='outer',on=['id']).rename(columns=COL_MAP['season_info'])
456
463
 
464
+ df = df[[col for col in COL_MAP['season_info'].values() if col in df.columns]]
465
+
457
466
  if len(seasons) > 0:
458
- return df.loc[df['id'].astype(str).isin(seasons)].sort_values(by=['id'])
467
+ return df.loc[df['season'].isin(seasons)].sort_values(by=['season'])
459
468
  else:
460
- return df.sort_values(by=['id'])
469
+ return df.sort_values(by=['season'])
461
470
 
462
471
  def nhl_scrape_standings(arg:int | list[int] | Literal['now'] = 'now', season_type:int = 2):
463
472
  """
@@ -526,8 +535,11 @@ def nhl_scrape_standings(arg:int | list[int] | Literal['now'] = 'now', season_ty
526
535
  data = rs.get(api).json()['standings']
527
536
  dfs.append(pd.json_normalize(data))
528
537
 
538
+ #Standardize columns
539
+ df = pd.concat(dfs).rename(columns=COL_MAP['standings'])
540
+
529
541
  #Return: standings data
530
- return pd.concat(dfs)
542
+ return df[[col for col in COL_MAP['standings'].values() if col in df.columns]]
531
543
 
532
544
  def nhl_scrape_roster(season: int):
533
545
  """
@@ -542,33 +554,40 @@ def nhl_scrape_roster(season: int):
542
554
  A DataFrame containing the rosters for all teams in the specified season.
543
555
  """
544
556
 
545
- print("Scrpaing rosters for the "+ season + "season...")
557
+ print(f'Scrpaing rosters for the {season} season...')
546
558
  teaminfo = pd.read_csv(info_path)
547
559
 
548
560
  rosts = []
549
- for team in list(teaminfo['Team']):
561
+ for team in teaminfo['team_abbr'].drop_duplicates():
550
562
  try:
551
- print("Scraping " + team + " roster...")
552
- api = "https://api-web.nhle.com/v1/roster/"+team+"/"+season
563
+ print(f'Scraping {team} roster...')
564
+ api = f'https://api-web.nhle.com/v1/roster/{team}/{season}'
553
565
 
554
566
  data = rs.get(api).json()
555
567
  forwards = pd.json_normalize(data['forwards'])
556
- forwards['headingPosition'] = "F"
568
+ forwards['heading_position'] = "F"
557
569
  dmen = pd.json_normalize(data['defensemen'])
558
- dmen['headingPosition'] = "D"
570
+ dmen['heading_position'] = "D"
559
571
  goalies = pd.json_normalize(data['goalies'])
560
- goalies['headingPosition'] = "G"
572
+ goalies['heading_position'] = "G"
561
573
 
562
574
  roster = pd.concat([forwards,dmen,goalies]).reset_index(drop=True)
563
- roster['fullName'] = (roster['firstName.default']+" "+roster['lastName.default']).str.upper()
575
+ roster['player_name'] = (roster['firstName.default']+" "+roster['lastName.default']).str.upper()
564
576
  roster['season'] = str(season)
565
577
  roster['team_abbr'] = team
566
578
 
567
579
  rosts.append(roster)
568
580
  except:
569
- print("No roster found for " + team + "...")
581
+ print(f'No roster found for {team}...')
582
+
583
+ #Combine rosters
584
+ df = pd.concat(rosts)
585
+
586
+ #Standardize columns
587
+ df = df.rename(columns=COL_MAP['roster'])
570
588
 
571
- return pd.concat(rosts)
589
+ #Return: roster data for provided season
590
+ return df[[col for col in COL_MAP['roster'].values() if col in df.columns]]
572
591
 
573
592
  def nhl_scrape_prospects(team:str):
574
593
  """
@@ -586,16 +605,21 @@ def nhl_scrape_prospects(team:str):
586
605
  api = f'https://api-web.nhle.com/v1/prospects/{team}'
587
606
 
588
607
  data = rs.get(api).json()
589
-
608
+
609
+ print(f'Scraping {team} prospects...')
610
+
590
611
  #Iterate through positions
591
612
  players = [pd.json_normalize(data[pos]) for pos in ['forwards','defensemen','goalies']]
592
613
 
593
614
  prospects = pd.concat(players)
594
615
  #Add name columns
595
- prospects['fullName'] = (prospects['firstName.default']+" "+prospects['lastName.default']).str.upper()
616
+ prospects['player_name'] = (prospects['firstName.default']+" "+prospects['lastName.default']).str.upper()
596
617
 
618
+ #Standardize columns
619
+ prospects = prospects.rename(columns=COL_MAP['prospects'])
620
+
597
621
  #Return: team prospects
598
- return prospects
622
+ return prospects[[col for col in COL_MAP['prospects'].values() if col in prospects.columns]]
599
623
 
600
624
  def nhl_scrape_team_info(country:bool = False):
601
625
  """
@@ -620,9 +644,13 @@ def nhl_scrape_team_info(country:bool = False):
620
644
  data['logo_light'] = 'https://assets.nhle.com/logos/nhl/svg/'+data['triCode']+'_light.svg'
621
645
  data['logo_dark'] = 'https://assets.nhle.com/logos/nhl/svg/'+data['triCode']+'_dark.svg'
622
646
 
623
- return data.sort_values(by=(['country3Code','countryCode','iocCode','countryName'] if country else ['fullName','triCode','id']))
647
+ #Standardize columns
648
+ data = data.rename(columns=COL_MAP['team_info'])
624
649
 
625
- def nhl_scrape_player_data(player_ids:list[int]):
650
+ #Return: team or country info
651
+ return data[[col for col in COL_MAP['team_info'].values() if col in data.columns]].sort_values(by=(['country_abbr','country_name'] if country else ['team_abbr','team_name']))
652
+
653
+ def nhl_scrape_player_info(player_ids:list[int]):
626
654
  """
627
655
  Returns player data for specified players.
628
656
 
@@ -635,6 +663,7 @@ def nhl_scrape_player_data(player_ids:list[int]):
635
663
  A DataFrame containing player data for specified players.
636
664
  """
637
665
 
666
+ print(f'Retreiving player information for {player_ids}...')
638
667
  infos = []
639
668
  for player_id in player_ids:
640
669
  player_id = int(player_id)
@@ -642,7 +671,7 @@ def nhl_scrape_player_data(player_ids:list[int]):
642
671
 
643
672
  data = pd.json_normalize(rs.get(api).json())
644
673
  #Add name column
645
- data['fullName'] = (data['firstName.default'] + " " + data['lastName.default']).str.upper()
674
+ data['player_name'] = (data['firstName.default'] + " " + data['lastName.default']).str.upper()
646
675
 
647
676
  #Append
648
677
  infos.append(data)
@@ -650,8 +679,11 @@ def nhl_scrape_player_data(player_ids:list[int]):
650
679
  if infos:
651
680
  df = pd.concat(infos)
652
681
 
682
+ #Standardize columns
683
+ df = df.rename(columns=COL_MAP['player_info'])
684
+
653
685
  #Return: player data
654
- return df
686
+ return df[[col for col in COL_MAP['player_info'].values() if col in df.columns]]
655
687
  else:
656
688
  return pd.DataFrame()
657
689
 
@@ -666,8 +698,8 @@ def nhl_scrape_draft_rankings(arg:str | Literal['now'] = 'now', category:int = 0
666
698
 
667
699
  - Category 1 is North American Skaters.
668
700
  - Category 2 is International Skaters.
669
- - Category 3 is North American Goalie.
670
- - Category 4 is International Goalie
701
+ - Category 3 is North American Goalies.
702
+ - Category 4 is International Goalies
671
703
 
672
704
  Default is 0 (all prospects).
673
705
  Returns:
@@ -675,15 +707,26 @@ def nhl_scrape_draft_rankings(arg:str | Literal['now'] = 'now', category:int = 0
675
707
  A DataFrame containing draft rankings.
676
708
  """
677
709
 
710
+ print(f'Scraping draft rankings for {arg}...\nCategory: {DRAFT_CAT[category]}...')
711
+
678
712
  #Player category only applies when requesting a specific season
679
713
  api = f"https://api-web.nhle.com/v1/draft/rankings/{arg}/{category}" if category > 0 else f"https://api-web.nhle.com/v1/draft/rankings/{arg}"
680
714
  data = pd.json_normalize(rs.get(api).json()['rankings'])
681
715
 
682
716
  #Add player name columns
683
- data['fullName'] = (data['firstName']+" "+data['lastName']).str.upper()
717
+ data['player_name'] = (data['firstName']+" "+data['lastName']).str.upper()
718
+
719
+ #Fix positions
720
+ data['positionCode'] = data['positionCode'].replace({
721
+ 'LW':'L',
722
+ 'RW':'R'
723
+ })
724
+
725
+ #Standardize columns
726
+ data = data.rename(columns=COL_MAP['draft_rankings'])
684
727
 
685
728
  #Return: prospect rankings
686
- return data
729
+ return data[[col for col in COL_MAP['draft_rankings'].values() if col in data.columns]]
687
730
 
688
731
  def nhl_scrape_game_info(game_ids:list[int]):
689
732
  """
@@ -698,6 +741,9 @@ def nhl_scrape_game_info(game_ids:list[int]):
698
741
  An DataFrame containing information for each game.
699
742
  """
700
743
 
744
+ #Wrap game_id in a list if only a single game_id is provided
745
+ game_ids = [game_ids] if type(game_ids) != list else game_ids
746
+
701
747
  print(f'Finding game information for games: {game_ids}')
702
748
 
703
749
  link = 'https://api-web.nhle.com/v1/gamecenter'
@@ -706,18 +752,15 @@ def nhl_scrape_game_info(game_ids:list[int]):
706
752
  df = pd.concat([pd.json_normalize(rs.get(f'{link}/{game_id}/landing').json()) for game_id in game_ids])
707
753
 
708
754
  #Add extra info
709
- df['date'] = df['gameDate']
710
- df['season_type'] = df['gameType']
711
- df['away_team_abbr'] = df['awayTeam.abbrev']
712
- df['home_team_abbr'] = df['homeTeam.abbrev']
713
- df['game_title'] = df['away_team_abbr'] + " @ " + df['home_team_abbr'] + " - " + df['date']
714
- df['estStartTime'] = pd.to_datetime(df['startTimeUTC']).dt.tz_convert('US/Eastern').dt.strftime("%I:%M %p")
755
+ df['game_date'] = df['gameDate']
756
+ df['game_title'] = df['awayTeam.abbrev'] + " @ " + df['homeTeam.abbrev'] + " - " + df['game_date']
757
+ df['start_time_est'] = pd.to_datetime(df['startTimeUTC']).dt.tz_convert('US/Eastern').dt.strftime("%I:%M %p")
715
758
 
716
- front_col = ['id','season','date','season_type','game_title','away_team_abbr','home_team_abbr','estStartTime']
717
- df = df[front_col+[col for col in df.columns.to_list() if col not in front_col]]
759
+ #Standardize columns
760
+ df = df.rename(columns=COL_MAP['schedule'])
718
761
 
719
762
  #Return: game information
720
- return df
763
+ return df[[col for col in COL_MAP['schedule'].values() if col in df.columns]]
721
764
 
722
765
 
723
766
  def nhl_apply_xG(pbp: pd.DataFrame):
@@ -1172,23 +1215,23 @@ def nhl_calculate_stats(pbp:pd.DataFrame, type:Literal['skater','goalie','team']
1172
1215
 
1173
1216
  #Import rosters and player info
1174
1217
  rosters = pd.read_csv(roster_path)
1175
- names = rosters[['id','fullName',
1176
- 'headshot','positionCode','shootsCatches',
1177
- 'heightInInches','weightInPounds',
1178
- 'birthDate','birthCountry']].drop_duplicates(subset=['id','fullName'],keep='last')
1218
+ names = rosters[['player_id','player_name',
1219
+ 'headshot','position','handedness',
1220
+ 'height_in','weight_lbs',
1221
+ 'birth_date','birth_country']].drop_duplicates(subset=['player_id','player_name'],keep='last')
1179
1222
 
1180
1223
  #Add names
1181
- complete = pd.merge(complete,names,how='left',left_on='ID',right_on='id')
1224
+ complete = pd.merge(complete,names,how='left',left_on='ID',right_on='player_id')
1182
1225
 
1183
1226
  #Rename if there are no missing names
1184
- complete = complete.rename(columns={'fullName':'Goalie',
1227
+ complete = complete.rename(columns={'player_name':'Goalie',
1185
1228
  'headshot':'Headshot',
1186
- 'positionCode':'Position',
1187
- 'shootsCatches':'Handedness',
1188
- 'heightInInches':'Height (in)',
1189
- 'weightInPounds':'Weight (lbs)',
1190
- 'birthDate':'Birthday',
1191
- 'birthCountry':'Nationality'})
1229
+ 'position':'Position',
1230
+ 'handedness':'Handedness',
1231
+ 'height_in':'Height (in)',
1232
+ 'weight_lbs':'Weight (lbs)',
1233
+ 'birth_date':'Birthday',
1234
+ 'birth_country':'Nationality'})
1192
1235
 
1193
1236
  #WSBA
1194
1237
  complete['WSBA'] = complete['ID'].astype(str).str.replace('.0','')+complete['Team']+complete['Season'].astype(str)
@@ -1300,23 +1343,23 @@ def nhl_calculate_stats(pbp:pd.DataFrame, type:Literal['skater','goalie','team']
1300
1343
 
1301
1344
  #Import rosters and player info
1302
1345
  rosters = pd.read_csv(roster_path)
1303
- names = rosters[['id','fullName',
1304
- 'headshot','positionCode','shootsCatches',
1305
- 'heightInInches','weightInPounds',
1306
- 'birthDate','birthCountry']].drop_duplicates(subset=['id','fullName'],keep='last')
1346
+ names = rosters[['player_id','player_name',
1347
+ 'headshot','position','handedness',
1348
+ 'height_in','weight_lbs',
1349
+ 'birth_date','birth_country']].drop_duplicates(subset=['player_id','player_name'],keep='last')
1307
1350
 
1308
1351
  #Add names
1309
- complete = pd.merge(complete,names,how='left',left_on='ID',right_on='id')
1352
+ complete = pd.merge(complete,names,how='left',left_on='ID',right_on='player_id')
1310
1353
 
1311
1354
  #Rename if there are no missing names
1312
- complete = complete.rename(columns={'fullName':'Player',
1355
+ complete = complete.rename(columns={'player_name':'Player',
1313
1356
  'headshot':'Headshot',
1314
- 'positionCode':'Position',
1315
- 'shootsCatches':'Handedness',
1316
- 'heightInInches':'Height (in)',
1317
- 'weightInPounds':'Weight (lbs)',
1318
- 'birthDate':'Birthday',
1319
- 'birthCountry':'Nationality'})
1357
+ 'position':'Position',
1358
+ 'handedness':'Handedness',
1359
+ 'height_in':'Height (in)',
1360
+ 'weight_lbs':'Weight (lbs)',
1361
+ 'birth_date':'Birthday',
1362
+ 'birth_country':'Nationality'})
1320
1363
 
1321
1364
  #Set TOI to minute
1322
1365
  complete['TOI'] = complete['TOI']/60
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: wsba_hockey
3
- Version: 1.2.2
3
+ Version: 1.2.3
4
4
  Summary: WeakSide Breakout's complete Python package of access to hockey data, primairly including the scraping of National Hockey League schedule, play-by-play, and shifts information.
5
5
  Author-email: Owen Singh <owenbksingh@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/owensingh38/wsba_hockey/
@@ -9,6 +9,7 @@ src/wsba_hockey.egg-info/dependency_links.txt
9
9
  src/wsba_hockey.egg-info/top_level.txt
10
10
  src/wsba_hockey/tools/__init__.py
11
11
  src/wsba_hockey/tools/agg.py
12
+ src/wsba_hockey/tools/columns.py
12
13
  src/wsba_hockey/tools/game_pred.py
13
14
  src/wsba_hockey/tools/plotting.py
14
15
  src/wsba_hockey/tools/scraping.py
File without changes
File without changes
File without changes
File without changes