wsba-hockey 1.2.3__tar.gz → 1.2.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {wsba_hockey-1.2.3/src/wsba_hockey.egg-info → wsba_hockey-1.2.5}/PKG-INFO +1 -1
- {wsba_hockey-1.2.3 → wsba_hockey-1.2.5}/pyproject.toml +1 -1
- {wsba_hockey-1.2.3 → wsba_hockey-1.2.5}/src/wsba_hockey/tools/columns.py +37 -2
- {wsba_hockey-1.2.3 → wsba_hockey-1.2.5}/src/wsba_hockey/wsba_main.py +50 -24
- {wsba_hockey-1.2.3 → wsba_hockey-1.2.5/src/wsba_hockey.egg-info}/PKG-INFO +1 -1
- {wsba_hockey-1.2.3 → wsba_hockey-1.2.5}/LICENSE +0 -0
- {wsba_hockey-1.2.3 → wsba_hockey-1.2.5}/README.md +0 -0
- {wsba_hockey-1.2.3 → wsba_hockey-1.2.5}/setup.cfg +0 -0
- {wsba_hockey-1.2.3 → wsba_hockey-1.2.5}/src/wsba_hockey/__init__.py +0 -0
- {wsba_hockey-1.2.3 → wsba_hockey-1.2.5}/src/wsba_hockey/tools/__init__.py +0 -0
- {wsba_hockey-1.2.3 → wsba_hockey-1.2.5}/src/wsba_hockey/tools/agg.py +0 -0
- {wsba_hockey-1.2.3 → wsba_hockey-1.2.5}/src/wsba_hockey/tools/archive/old_scraping.py +0 -0
- {wsba_hockey-1.2.3 → wsba_hockey-1.2.5}/src/wsba_hockey/tools/game_pred.py +0 -0
- {wsba_hockey-1.2.3 → wsba_hockey-1.2.5}/src/wsba_hockey/tools/plotting.py +0 -0
- {wsba_hockey-1.2.3 → wsba_hockey-1.2.5}/src/wsba_hockey/tools/scraping.py +0 -0
- {wsba_hockey-1.2.3 → wsba_hockey-1.2.5}/src/wsba_hockey/tools/utils/__init__.py +0 -0
- {wsba_hockey-1.2.3 → wsba_hockey-1.2.5}/src/wsba_hockey/tools/utils/shared.py +0 -0
- {wsba_hockey-1.2.3 → wsba_hockey-1.2.5}/src/wsba_hockey/tools/xg_model.py +0 -0
- {wsba_hockey-1.2.3 → wsba_hockey-1.2.5}/src/wsba_hockey.egg-info/SOURCES.txt +0 -0
- {wsba_hockey-1.2.3 → wsba_hockey-1.2.5}/src/wsba_hockey.egg-info/dependency_links.txt +0 -0
- {wsba_hockey-1.2.3 → wsba_hockey-1.2.5}/src/wsba_hockey.egg-info/top_level.txt +0 -0
- {wsba_hockey-1.2.3 → wsba_hockey-1.2.5}/tests/tests.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: wsba_hockey
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.5
|
|
4
4
|
Summary: WeakSide Breakout's complete Python package of access to hockey data, primairly including the scraping of National Hockey League schedule, play-by-play, and shifts information.
|
|
5
5
|
Author-email: Owen Singh <owenbksingh@gmail.com>
|
|
6
6
|
Project-URL: Homepage, https://github.com/owensingh38/wsba_hockey/
|
|
@@ -4,7 +4,9 @@
|
|
|
4
4
|
def col_map():
|
|
5
5
|
return {
|
|
6
6
|
'schedule':{
|
|
7
|
+
'season':'season',
|
|
7
8
|
'id':'game_id',
|
|
9
|
+
'game_title':'game_title',
|
|
8
10
|
'game_date':'game_date',
|
|
9
11
|
'gameType':'season_type',
|
|
10
12
|
'neutralSite':'neutral_site',
|
|
@@ -36,6 +38,7 @@ def col_map():
|
|
|
36
38
|
'periodDescriptor.maxRegulationPeriods':'period_max_regulation',
|
|
37
39
|
'periodDescriptor.periodType':'period_type',
|
|
38
40
|
'gameOutcome.lastPeriodType':'period_type_last',
|
|
41
|
+
'gameOutcome.otPeriods':'ot_periods',
|
|
39
42
|
'seriesUrl':'series_url',
|
|
40
43
|
'seriesStatus.round':'series_round',
|
|
41
44
|
'seriesStatus.seriesAbbrev':'series_abbr',
|
|
@@ -46,7 +49,11 @@ def col_map():
|
|
|
46
49
|
'seriesStatus.topSeedTeamAbbrev':'top_seed_team_abbr',
|
|
47
50
|
'seriesStatus.bottomSeedTeamAbbrev':'bottom_seed_team_abbr',
|
|
48
51
|
'seriesStatus.topSeedWins':'top_seed_wins',
|
|
49
|
-
'seriesStatus.bottomSeedWins':'bottom_seed_wins'
|
|
52
|
+
'seriesStatus.bottomSeedWins':'bottom_seed_wins',
|
|
53
|
+
'clock.timeRemaining':'period_time_remaining',
|
|
54
|
+
'clock.secondsRemaining':'period_seconds_remaining',
|
|
55
|
+
'clock.running':'period_clock_running',
|
|
56
|
+
'clock.inIntermission':'game_in_intermission'
|
|
50
57
|
},
|
|
51
58
|
'season_info':{
|
|
52
59
|
'id':'season',
|
|
@@ -155,7 +162,35 @@ def col_map():
|
|
|
155
162
|
"placeName.default": "place_name",
|
|
156
163
|
"teamName.default": "team_name",
|
|
157
164
|
"teamCommonName.default": "team_common_name",
|
|
158
|
-
"teamAbbrev.default": "team_abbr"
|
|
165
|
+
"teamAbbrev.default": "team_abbr",
|
|
166
|
+
"seriesUrl": "series_url",
|
|
167
|
+
"seriesTitle": "series_title",
|
|
168
|
+
"seriesAbbrev": "series_abbrev",
|
|
169
|
+
"seriesLetter": "series_letter",
|
|
170
|
+
"playoffRound": "playoff_round",
|
|
171
|
+
"topSeedRank": "top_seed_rank",
|
|
172
|
+
"topSeedRankAbbrev": "top_seed_rank_abbr",
|
|
173
|
+
"topSeedWins": "top_seed_wins",
|
|
174
|
+
"bottomSeedRank": "bottom_seed_rank",
|
|
175
|
+
"bottomSeedRankAbbrev": "bottom_seed_rank_abbr",
|
|
176
|
+
"bottomSeedWins": "bottom_seed_wins",
|
|
177
|
+
"winningTeamId": "winning_team_id",
|
|
178
|
+
"losingTeamId": "losing_team_id",
|
|
179
|
+
"topSeedTeam.id": "top_seed_team_id",
|
|
180
|
+
"topSeedTeam.abbrev": "top_seed_team_abbr",
|
|
181
|
+
"topSeedTeam.name.default": "top_seed_team_name",
|
|
182
|
+
"topSeedTeam.commonName.default": "top_seed_team_common_name",
|
|
183
|
+
"topSeedTeam.placeNameWithPreposition.default": "top_seed_team_place_name",
|
|
184
|
+
"topSeedTeam.logo": "top_seed_team_logo",
|
|
185
|
+
"topSeedTeam.darkLogo": "top_seed_team_dark_logo",
|
|
186
|
+
"bottomSeedTeam.id": "bottom_seed_team_id",
|
|
187
|
+
"bottomSeedTeam.abbrev": "bottom_seed_team_abbr",
|
|
188
|
+
"bottomSeedTeam.name.default": "bottom_seed_team_name",
|
|
189
|
+
"bottomSeedTeam.commonName.default": "bottom_seed_team_common_name",
|
|
190
|
+
"bottomSeedTeam.placeNameWithPreposition.default": "bottom_seed_team_place_name",
|
|
191
|
+
"bottomSeedTeam.logo": "bottom_seed_team_logo",
|
|
192
|
+
"bottomSeedTeam.darkLogo": "bottom_seed_team_dark_logo",
|
|
193
|
+
"seriesLogo": "series_logo",
|
|
159
194
|
},
|
|
160
195
|
'roster':{
|
|
161
196
|
"id": "player_id",
|
|
@@ -85,7 +85,7 @@ CONVERT_TEAM_ABBR = {'L.A':'LAK',
|
|
|
85
85
|
'T.B':'TBL',
|
|
86
86
|
'PHX':'ARI'}
|
|
87
87
|
|
|
88
|
-
PER_SIXTY = ['Fi','xGi','Gi','A1','A2','P1','P','Si','OZF','NZF','DZF','FF','FA','xGF','xGA','GF','GA','SF','SA','CF','CA','HF','HA','Give','Take','Penl','Penl2','Penl5','Draw','Block','GSAx']
|
|
88
|
+
PER_SIXTY = ['Fi','xGi','Gi','A1','A2','P1','P','Si','OZF','NZF','DZF','FF','FA','xGF','xGA','GF','GA','SF','SA','CF','CA','HF','HA','Give','Take','Penl','Penl2','Penl5','Draw','PIM','Block','GSAx']
|
|
89
89
|
|
|
90
90
|
#Some games in the API are specifically known to cause errors in scraping.
|
|
91
91
|
#This list is updated as frequently as necessary
|
|
@@ -289,7 +289,7 @@ def nhl_scrape_schedule(season:int, start:str = '', end:str = ''):
|
|
|
289
289
|
A DataFrame containing the schedule data for the specified season and date range.
|
|
290
290
|
"""
|
|
291
291
|
|
|
292
|
-
api = "https://api-web.nhle.com/v1/
|
|
292
|
+
api = "https://api-web.nhle.com/v1/score/"
|
|
293
293
|
|
|
294
294
|
#If either start or end are blank then find start and endpoints for specified season
|
|
295
295
|
if start == '' or end == '':
|
|
@@ -325,7 +325,7 @@ def nhl_scrape_schedule(season:int, start:str = '', end:str = ''):
|
|
|
325
325
|
print(f'Scraping games on {str(inc)[:10]}...')
|
|
326
326
|
|
|
327
327
|
get = rs.get(f'{api}{str(inc)[:10]}').json()
|
|
328
|
-
gameWeek = pd.json_normalize(
|
|
328
|
+
gameWeek = pd.json_normalize(get['games']).drop(columns=['goals'],errors='ignore')
|
|
329
329
|
|
|
330
330
|
#Return nothing if there's nothing
|
|
331
331
|
if gameWeek.empty:
|
|
@@ -382,35 +382,54 @@ def nhl_scrape_season(season:int, split_shifts:bool = False, season_types:list[i
|
|
|
382
382
|
"""
|
|
383
383
|
|
|
384
384
|
#Determine whether to use schedule data in repository or to scrape
|
|
385
|
+
local_failed = False
|
|
386
|
+
|
|
385
387
|
if local:
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
388
|
+
try:
|
|
389
|
+
load = pd.read_csv(local_path)
|
|
390
|
+
load['game_date'] = pd.to_datetime(load['game_date'])
|
|
391
|
+
|
|
392
|
+
if start == '' or end == '':
|
|
393
|
+
season_data = rs.get('https://api.nhle.com/stats/rest/en/season').json()['data']
|
|
394
|
+
season_data = [s for s in season_data if s['id'] == season][0]
|
|
395
|
+
|
|
396
|
+
season_start = season_data['startDate'][0:10]
|
|
397
|
+
season_end = season_data['endDate'][0:10]
|
|
398
|
+
|
|
399
|
+
else:
|
|
400
|
+
season_start = f'{(str(season)[0:4] if int(start[0:2])>=9 else str(season)[4:8])}-{start[0:2]}-{start[3:5]}'
|
|
401
|
+
season_end = f'{(str(season)[0:4] if int(end[0:2])>=9 else str(season)[4:8])}-{end[0:2]}-{end[3:5]}'
|
|
402
|
+
|
|
395
403
|
form = '%Y-%m-%d'
|
|
396
404
|
|
|
397
405
|
#Create datetime values from dates
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
406
|
+
start_date = datetime.strptime(season_start,form)
|
|
407
|
+
end_date = datetime.strptime(season_end,form)
|
|
408
|
+
|
|
409
|
+
load = load.loc[(load['season']==season)&
|
|
410
|
+
(load['season_type'].isin(season_types))&
|
|
411
|
+
(load['game_date']>=start_date)&(load['game_date']<=end_date)&
|
|
412
|
+
(load['game_schedule_state']=='OK')&
|
|
413
|
+
(load['game_state']!='FUT')
|
|
414
|
+
]
|
|
404
415
|
|
|
416
|
+
game_ids = load['game_id'].to_list()
|
|
417
|
+
except KeyError:
|
|
418
|
+
#If loading games locally fails then force a scrape
|
|
419
|
+
local_failed = True
|
|
420
|
+
print('Loading games locally has failed. Loading schedule data with a scrape...')
|
|
421
|
+
else:
|
|
422
|
+
local_failed = True
|
|
423
|
+
|
|
424
|
+
if local_failed:
|
|
425
|
+
load = nhl_scrape_schedule(season,start,end)
|
|
405
426
|
load = load.loc[(load['season']==season)&
|
|
406
427
|
(load['season_type'].isin(season_types))&
|
|
407
|
-
(load['
|
|
428
|
+
(load['game_schedule_state']=='OK')&
|
|
429
|
+
(load['game_state']!='FUT')
|
|
430
|
+
]
|
|
408
431
|
|
|
409
432
|
game_ids = load['game_id'].to_list()
|
|
410
|
-
else:
|
|
411
|
-
load = nhl_scrape_schedule(season,start,end)
|
|
412
|
-
load = load.loc[(load['season']==season)&(load['season_type'].isin(season_types))]
|
|
413
|
-
game_ids = load['game_id'].to_list()
|
|
414
433
|
|
|
415
434
|
#If no games found, terminate the process
|
|
416
435
|
if not game_ids:
|
|
@@ -504,8 +523,11 @@ def nhl_scrape_standings(arg:int | list[int] | Literal['now'] = 'now', season_ty
|
|
|
504
523
|
data = rs.get(api).json()['series']
|
|
505
524
|
dfs.append(pd.json_normalize(data))
|
|
506
525
|
|
|
526
|
+
#Combine and standardize columns
|
|
527
|
+
df = pd.concat(dfs).rename(columns=COL_MAP['standings'])
|
|
528
|
+
|
|
507
529
|
#Return: playoff bracket
|
|
508
|
-
return
|
|
530
|
+
return df[[col for col in COL_MAP['standings'].values() if col in df.columns]]
|
|
509
531
|
|
|
510
532
|
else:
|
|
511
533
|
if arg == "now":
|
|
@@ -664,6 +686,10 @@ def nhl_scrape_player_info(player_ids:list[int]):
|
|
|
664
686
|
"""
|
|
665
687
|
|
|
666
688
|
print(f'Retreiving player information for {player_ids}...')
|
|
689
|
+
|
|
690
|
+
#Wrap game_id in a list if only a single game_id is provided
|
|
691
|
+
player_ids = [player_ids] if type(player_ids) != list else player_ids
|
|
692
|
+
|
|
667
693
|
infos = []
|
|
668
694
|
for player_id in player_ids:
|
|
669
695
|
player_id = int(player_id)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: wsba_hockey
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.5
|
|
4
4
|
Summary: WeakSide Breakout's complete Python package of access to hockey data, primairly including the scraping of National Hockey League schedule, play-by-play, and shifts information.
|
|
5
5
|
Author-email: Owen Singh <owenbksingh@gmail.com>
|
|
6
6
|
Project-URL: Homepage, https://github.com/owensingh38/wsba_hockey/
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|