wsba-hockey 1.1.8__py3-none-any.whl → 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (145) hide show
  1. wsba_hockey/__init__.py +22 -1
  2. wsba_hockey/tools/scraping.py +166 -190
  3. wsba_hockey/tools/utils/__init__.py +0 -1
  4. wsba_hockey/tools/utils/shared.py +14 -389
  5. wsba_hockey/tools/xg_model.py +6 -1
  6. wsba_hockey/wsba_main.py +330 -20
  7. {wsba_hockey-1.1.8.dist-info → wsba_hockey-1.2.0.dist-info}/METADATA +16 -15
  8. wsba_hockey-1.2.0.dist-info/RECORD +15 -0
  9. wsba_hockey/api/api/index.py +0 -162
  10. wsba_hockey/data_pipelines.py +0 -247
  11. wsba_hockey/evidence/weakside-breakout/node_modules/duckdb/vendor.py +0 -146
  12. wsba_hockey/evidence/weakside-breakout/node_modules/flatted/python/flatted.py +0 -149
  13. wsba_hockey/evidence/weakside-breakout/node_modules/flatted/python/test.py +0 -63
  14. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/gyp_main.py +0 -45
  15. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSNew.py +0 -367
  16. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSProject.py +0 -206
  17. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSSettings.py +0 -1270
  18. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSSettings_test.py +0 -1547
  19. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSToolFile.py +0 -59
  20. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSUserFile.py +0 -153
  21. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSUtil.py +0 -271
  22. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSVersion.py +0 -574
  23. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/__init__.py +0 -690
  24. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/common.py +0 -661
  25. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/common_test.py +0 -78
  26. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/easy_xml.py +0 -165
  27. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/easy_xml_test.py +0 -109
  28. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/flock_tool.py +0 -55
  29. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/__init__.py +0 -0
  30. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/analyzer.py +0 -808
  31. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/android.py +0 -1173
  32. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/cmake.py +0 -1321
  33. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/compile_commands_json.py +0 -120
  34. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/dump_dependency_json.py +0 -103
  35. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/eclipse.py +0 -464
  36. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/gypd.py +0 -89
  37. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/gypsh.py +0 -58
  38. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/make.py +0 -2714
  39. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/msvs.py +0 -3981
  40. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/msvs_test.py +0 -44
  41. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/ninja.py +0 -2936
  42. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/ninja_test.py +0 -55
  43. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/xcode.py +0 -1394
  44. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/xcode_test.py +0 -25
  45. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/input.py +0 -3130
  46. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/input_test.py +0 -98
  47. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/mac_tool.py +0 -771
  48. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/msvs_emulation.py +0 -1271
  49. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/ninja_syntax.py +0 -174
  50. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/simple_copy.py +0 -61
  51. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/win_tool.py +0 -374
  52. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/xcode_emulation.py +0 -1939
  53. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/xcode_ninja.py +0 -302
  54. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/xcodeproj_file.py +0 -3197
  55. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/xml_fix.py +0 -65
  56. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/test_gyp.py +0 -261
  57. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/tools/graphviz.py +0 -102
  58. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/tools/pretty_gyp.py +0 -156
  59. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/tools/pretty_sln.py +0 -181
  60. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/tools/pretty_vcproj.py +0 -339
  61. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/test/fixtures/test-charmap.py +0 -31
  62. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/update-gyp.py +0 -64
  63. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/gyp_main.py +0 -45
  64. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSNew.py +0 -367
  65. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSProject.py +0 -206
  66. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSSettings.py +0 -1270
  67. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSSettings_test.py +0 -1547
  68. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSToolFile.py +0 -59
  69. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSUserFile.py +0 -153
  70. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSUtil.py +0 -271
  71. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSVersion.py +0 -574
  72. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/__init__.py +0 -666
  73. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/common.py +0 -654
  74. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/common_test.py +0 -78
  75. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/easy_xml.py +0 -165
  76. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/easy_xml_test.py +0 -109
  77. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/flock_tool.py +0 -55
  78. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/__init__.py +0 -0
  79. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/analyzer.py +0 -808
  80. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/android.py +0 -1173
  81. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/cmake.py +0 -1321
  82. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/compile_commands_json.py +0 -120
  83. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/dump_dependency_json.py +0 -103
  84. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/eclipse.py +0 -464
  85. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/gypd.py +0 -89
  86. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/gypsh.py +0 -58
  87. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/make.py +0 -2518
  88. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/msvs.py +0 -3978
  89. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/msvs_test.py +0 -44
  90. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/ninja.py +0 -2936
  91. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/ninja_test.py +0 -55
  92. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/xcode.py +0 -1394
  93. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/xcode_test.py +0 -25
  94. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/input.py +0 -3137
  95. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/input_test.py +0 -98
  96. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/mac_tool.py +0 -771
  97. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/msvs_emulation.py +0 -1271
  98. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/ninja_syntax.py +0 -174
  99. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/simple_copy.py +0 -61
  100. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/win_tool.py +0 -374
  101. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/xcode_emulation.py +0 -1939
  102. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/xcode_ninja.py +0 -302
  103. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/xcodeproj_file.py +0 -3197
  104. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/xml_fix.py +0 -65
  105. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/setup.py +0 -42
  106. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/test_gyp.py +0 -260
  107. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/tools/graphviz.py +0 -102
  108. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/tools/pretty_gyp.py +0 -156
  109. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/tools/pretty_sln.py +0 -181
  110. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/tools/pretty_vcproj.py +0 -339
  111. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/test/fixtures/test-charmap.py +0 -31
  112. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/update-gyp.py +0 -46
  113. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/duos/app.py +0 -210
  114. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/duos/calc.py +0 -163
  115. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/game_stats/app.py +0 -401
  116. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/game_stats/name_fix.py +0 -47
  117. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/goalie/app.py +0 -101
  118. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/goalie/plot.py +0 -71
  119. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/goalie/rink_plot.py +0 -245
  120. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/heatmaps/app.py +0 -108
  121. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/heatmaps/plot.py +0 -95
  122. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/heatmaps/rink_plot.py +0 -245
  123. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/line-combos/app.py +0 -245
  124. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/line-combos/plot.py +0 -275
  125. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/line-combos/rink_plot.py +0 -245
  126. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/matchups/app.py +0 -145
  127. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/matchups/plot.py +0 -79
  128. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/matchups/rink_plot.py +0 -245
  129. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/pbp/app.py +0 -406
  130. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/pbp/plot.py +0 -79
  131. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/pbp/rink_plot.py +0 -245
  132. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/skater/app.py +0 -110
  133. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/skater/plot.py +0 -59
  134. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/skater/rink_plot.py +0 -245
  135. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/team_heatmaps/app.py +0 -103
  136. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/team_heatmaps/plot.py +0 -95
  137. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/team_heatmaps/rink_plot.py +0 -245
  138. wsba_hockey/flask/app.py +0 -77
  139. wsba_hockey/tools/utils/config.py +0 -14
  140. wsba_hockey/tools/utils/save_pages.py +0 -133
  141. wsba_hockey/workspace.py +0 -31
  142. wsba_hockey-1.1.8.dist-info/RECORD +0 -148
  143. {wsba_hockey-1.1.8.dist-info → wsba_hockey-1.2.0.dist-info}/WHEEL +0 -0
  144. {wsba_hockey-1.1.8.dist-info → wsba_hockey-1.2.0.dist-info}/licenses/LICENSE +0 -0
  145. {wsba_hockey-1.1.8.dist-info → wsba_hockey-1.2.0.dist-info}/top_level.txt +0 -0
wsba_hockey/__init__.py CHANGED
@@ -1 +1,22 @@
1
- from wsba_hockey.wsba_main import nhl_scrape_game,nhl_scrape_schedule,nhl_scrape_season,nhl_scrape_seasons_info,nhl_scrape_standings,nhl_scrape_roster,nhl_scrape_draft_rankings,nhl_scrape_prospects,nhl_calculate_stats,nhl_apply_xG,nhl_plot_skaters_shots,nhl_plot_games,repo_load_rosters,repo_load_schedule,repo_load_teaminfo,repo_load_pbp,repo_load_seasons
1
+ from wsba_hockey.wsba_main import (
2
+ nhl_scrape_game,
3
+ nhl_scrape_schedule,
4
+ nhl_scrape_season,
5
+ nhl_scrape_seasons_info,
6
+ nhl_scrape_standings,
7
+ nhl_scrape_roster,
8
+ nhl_scrape_draft_rankings,
9
+ nhl_scrape_prospects,
10
+ nhl_scrape_game_info,
11
+ nhl_calculate_stats,
12
+ nhl_apply_xG,
13
+ nhl_plot_skaters_shots,
14
+ nhl_plot_games,
15
+ repo_load_rosters,
16
+ repo_load_schedule,
17
+ repo_load_teaminfo,
18
+ repo_load_pbp,
19
+ repo_load_seasons
20
+ )
21
+
22
+ from wsba_hockey.wsba_main import NHL_Database
@@ -90,22 +90,26 @@ def get_game_roster(json):
90
90
  def get_game_coaches(game_id):
91
91
  #Given game info, return head coaches for away and home team
92
92
 
93
- #Retreive data
94
- json = rs.get(f'https://api-web.nhle.com/v1/gamecenter/{game_id}/right-rail').json()
95
- data = json['gameInfo']
96
-
97
- #Add coaches
93
+ #Retreive data (or try to)
98
94
  try:
99
- away = data['awayTeam']['headCoach']['default'].upper()
100
- home = data['homeTeam']['headCoach']['default'].upper()
101
-
102
- coaches = {'away':away,
103
- 'home':home}
104
- except KeyError:
105
- return {}
95
+ json = rs.get(f'https://api-web.nhle.com/v1/gamecenter/{game_id}/right-rail').json()
96
+ data = json['gameInfo']
106
97
 
107
- #Return: dict with coaches
108
- return coaches
98
+ #Add coaches
99
+ try:
100
+ away = data['awayTeam']['headCoach']['default'].upper()
101
+ home = data['homeTeam']['headCoach']['default'].upper()
102
+
103
+ coaches = {'away':away,
104
+ 'home':home}
105
+ except KeyError:
106
+ return {}
107
+
108
+ #Return: dict with coaches
109
+ return coaches
110
+ except rs.JSONDecodeError:
111
+ #Right-rail content is missing for some playoff games in 2019-20
112
+ return {}
109
113
 
110
114
  def get_game_info(game_id):
111
115
  #Given game_id, return game information
@@ -247,17 +251,20 @@ async def parse_json(info):
247
251
  # x, y - Raw coordinates from JSON pbp
248
252
  # x_adj, y_adj - Adjusted coordinates configuring the away offensive zone to the left and the home offensive zone to the right
249
253
  #Some games (mostly preseason and all star games) do not include coordinates.
250
-
251
- try:
252
- events = adjust_coords(events)
253
-
254
- except KeyError:
255
- print(f"No coordinates found for game {info['game_id'][0]}...")
256
-
257
- events['x_adj'] = np.nan
258
- events['y_adj'] = np.nan
259
- events['event_distance'] = np.nan
260
- events['event_angle'] = np.nan
254
+ if info['season'] in [20052006, 20062007, 20072008, 20082009, 20092010]:
255
+ #If the json is used as a supplement for the ESPN pbp data then remove unnecessary columns
256
+ events = events.drop(columns=['x','y','event_team_venue','period_seconds_elapsed','game_id',
257
+ 'period_time_elapsed', 'shot_type', 'zone_code', 'event_player_1_id', 'event_player_2_id', 'event_player_3_id'],
258
+ errors='ignore')
259
+ else:
260
+ try:
261
+ events = adjust_coords(events)
262
+ except KeyError:
263
+ print(f"No coordinates found for game {info['game_id'][0]}...")
264
+ events['x_adj'] = np.nan
265
+ events['y_adj'] = np.nan
266
+ events['event_distance'] = np.nan
267
+ events['event_angle'] = np.nan
261
268
 
262
269
  #Period time adjustments (only 'seconds_elapsed' is included in the resulting data)
263
270
  events['period_seconds_elapsed'] = events['period_time_elasped'].apply(convert_to_seconds)
@@ -509,26 +516,26 @@ async def parse_html(info):
509
516
 
510
517
  data = pd.concat(event_log)
511
518
  data['event_type'] = data['event_type'].replace({
512
- "PGSTR": "pre-game-start",
513
- "PGEND": "pre-game-end",
514
- 'GSTR':"game-start",
515
- "ANTHEM":"anthem",
516
- "PSTR":"period-start",
517
- "FAC":"faceoff",
518
- "SHOT":"shot-on-goal",
519
- "BLOCK":"blocked-shot",
520
- "STOP":"stoppage",
521
- "MISS":"missed-shot",
522
- "HIT":"hit",
523
- "GOAL":"goal",
524
- "GIVE":"giveaway",
525
- "TAKE":"takeaway",
526
- "DELPEN":"delayed-penalty",
527
- "PENL":"penalty",
528
- "CHL":"challenge",
529
- "SOC":'shootout-complete',
530
- "PEND":"period-end",
531
- "GEND":"game-end"
519
+ "PGSTR": "pre-game-start",
520
+ "PGEND": "pre-game-end",
521
+ 'GSTR':"game-start",
522
+ "ANTHEM":"anthem",
523
+ "PSTR":"period-start",
524
+ "FAC":"faceoff",
525
+ "SHOT":"shot-on-goal",
526
+ "BLOCK":"blocked-shot",
527
+ "STOP":"stoppage",
528
+ "MISS":"missed-shot",
529
+ "HIT":"hit",
530
+ "GOAL":"goal",
531
+ "GIVE":"giveaway",
532
+ "TAKE":"takeaway",
533
+ "DELPEN":"delayed-penalty",
534
+ "PENL":"penalty",
535
+ "CHL":"challenge",
536
+ "SOC":'shootout-complete',
537
+ "PEND":"period-end",
538
+ "GEND":"game-end"
532
539
  })
533
540
 
534
541
  #Return: parsed HTML pbp
@@ -563,141 +570,78 @@ def espn_game_id(date,away,home):
563
570
  return game_id
564
571
 
565
572
  async def parse_espn(date,away,home):
566
- #Given a date formatted as YYYY-MM-DD and teams, return game events
573
+ #Given a date formatted as YYYY-MM-DD and teams, return game events from ESPN
567
574
  game_id = espn_game_id(date,away,home)
568
- url = f'https://www.espn.com/nhl/playbyplay/_/gameId/{game_id}'
569
-
570
- #Code modified from Patrick Bacon
571
-
572
- #Retreive game events as json
573
- page = rs.get(url, headers={'User-Agent': 'Mozilla/5.0'}, timeout = 500)
574
- soup = BeautifulSoup(page.content.decode('ISO-8859-1'), 'lxml', multi_valued_attributes = None)
575
- json = json_lib.loads(str(soup).split('"playGrps":')[1].split(',"tms"')[0])
576
-
577
- #DataFrame of time-related info for events
578
- clock_df = pd.DataFrame()
579
-
580
- for period in range(0, len(json)):
581
- clock_df = clock_df._append(pd.DataFrame(json[period]))
582
-
583
- clock_df = clock_df[~pd.isna(clock_df.clock)]
584
-
585
- # Needed to add .split(',"st":3')[0] for playoffs
586
-
587
- #DataFrame of coordinates for events
588
- coords_df = pd.DataFrame(json_lib.loads(str(soup).split('plays":')[1].split(',"st":1')[0].split(',"st":2')[0].split(',"st":3')[0]))
589
-
590
- clock_df = clock_df.assign(
591
- clock = clock_df.clock.apply(lambda x: x['displayValue'])
592
- )
593
-
594
- coords_df = coords_df.assign(
595
- coords_x = coords_df[~pd.isna(coords_df.coordinate)].coordinate.apply(lambda x: x['x']).astype(int),
596
- coords_y = coords_df[~pd.isna(coords_df.coordinate)].coordinate.apply(lambda y: y['y']).astype(int),
597
- )
598
-
599
- #Combine
600
- espn_events = coords_df.merge(clock_df.loc[:, ['id', 'clock']])
601
-
602
- espn_events = espn_events.assign(
603
- period = espn_events['period'].apply(lambda x: x['number']),
604
- minutes = espn_events['clock'].str.split(':').apply(lambda x: x[0]).astype(int),
605
- seconds = espn_events['clock'].str.split(':').apply(lambda x: x[1]).astype(int),
606
- event_type = espn_events['type'].apply(lambda x: x['txt'])
607
- )
608
-
609
- espn_events = espn_events.assign(coords_x = np.where((pd.isna(espn_events.coords_x)) & (pd.isna(espn_events.coords_y)) &
610
- (espn_events.event_type=='Face Off'), 0, espn_events.coords_x
611
- ),
612
- coords_y = np.where((pd.isna(espn_events.coords_x)) & (pd.isna(espn_events.coords_y)) &
613
- (espn_events.event_type=='Face Off'), 0, espn_events.coords_y))
614
-
615
- espn_events = espn_events[(~pd.isna(espn_events.coords_x)) & (~pd.isna(espn_events.coords_y))]
616
-
617
- espn_events = espn_events.assign(
618
- coords_x = espn_events.coords_x.astype(int),
619
- coords_y = espn_events.coords_y.astype(int)
620
- )
621
575
 
622
- #Rename events
623
- #The turnover event includes just one player in the event information, meaning takeaways will have no coordinates for play-by-plays created by ESPN scraping
624
- espn_events['event_type'] = espn_events['event_type'].replace({
625
- "Face Off":'faceoff',
626
- "Hit":'hit',
627
- "Shot":'shot-on-goal',
628
- "Missed":'missed-shot',
629
- "Blocked":'blocked-shot',
630
- "Goal":'goal',
631
- "Delayed Penalty":'delayed-penalty',
632
- "Penalty":'penalty',
576
+ #Hidden ESPN API endpoint (akin to the gamecenter/{game_id}/play-by-play NHL endpoint)
577
+ url = f'https://site.api.espn.com/apis/site/v2/sports/hockey/nhl/summary?event={game_id}'
578
+ data = rs.get(url).json()
579
+ teams = data['boxscore']['teams']
580
+
581
+ #Retreive plays
582
+ espn_events = pd.json_normalize(data['plays']).rename(columns={
583
+ 'period.number':'period',
584
+ 'clock.displayValue':'period_time_elapsed',
585
+ 'coordinate.x':'x',
586
+ 'coordinate.y':'y',
587
+ 'type.text':'event_type',
633
588
  })
589
+
590
+ #Some games are missing plays on ESPN, for some reason
634
591
 
635
- #Period time adjustments (only 'seconds_elapsed' is included in the resulting data)
636
- espn_events['period_time_simple'] = espn_events['clock'].str.replace(":","",regex=True)
637
- espn_events['period_seconds_elapsed'] = np.where(espn_events['period_time_simple'].str.len()==3,
638
- ((espn_events['period_time_simple'].str[0].astype(int)*60)+espn_events['period_time_simple'].str[-2:].astype(int)),
639
- ((espn_events['period_time_simple'].str[0:2].astype(int)*60)+espn_events['period_time_simple'].str[-2:].astype(int)))
640
- espn_events['seconds_elapsed'] = ((espn_events['period']-1)*1200)+espn_events['period_seconds_elapsed']
641
-
642
- espn_events = espn_events.rename(columns = {'text':'description'})
643
-
644
- #Add event team
645
- espn_events['event_team_abbr'] = espn_events['homeAway'].replace({
646
- "away":away,
647
- "home":home
648
- })
592
+ if espn_events.empty:
593
+ print(f"No coordinates found for game ...")
594
+ return pd.DataFrame(columns=['period','seconds_elapsed','event_type','event_team_abbr'])
595
+ else:
596
+ #Retreive event team venue with team data (maintain the team abbreviation fill-in at the bottom)
597
+ espn_events['event_team_venue'] = espn_events['team.id'].replace({
598
+ teams[0]['team']['id']: teams[0]['homeAway'],
599
+ teams[1]['team']['id']: teams[1]['homeAway']
600
+ })
601
+
602
+ #Rename events
603
+ #The turnover event includes just one player in the event information, meaning giveaways and takeaways will have no coordinates for play-by-plays created by ESPN scraping
604
+ espn_events['event_type'] = espn_events['event_type'].replace({
605
+ "Face Off":'faceoff',
606
+ "Hit":'hit',
607
+ "Shot":'shot-on-goal',
608
+ "Missed":'missed-shot',
609
+ "Blocked":'blocked-shot',
610
+ "Goal":'goal',
611
+ "Delayed Penalty":'delayed-penalty',
612
+ "Penalty":'penalty'
613
+ })
614
+
615
+ #Period time adjustments (only 'seconds_elapsed' is included in the resulting data)
616
+ espn_events['period_time_elapsed'] = espn_events['period_time_elapsed'].fillna('0:00')
617
+ espn_events['period_seconds_elapsed'] = espn_events['period_time_elapsed'].apply(convert_to_seconds)
618
+ espn_events['seconds_elapsed'] = ((espn_events['period']-1)*1200)+espn_events['period_seconds_elapsed']
619
+
620
+ #Add event team data
621
+ espn_events['event_team_abbr'] = espn_events['event_team_venue'].replace({
622
+ "away":away,
623
+ "home":home
624
+ })
625
+
626
+ #Add temporary game_id for coordinate adjustment
627
+ espn_events['game_id'] = game_id
628
+
629
+ #Coordinate adjustments:
630
+ # x, y - Raw coordinates from JSON pbp
631
+ # x_adj, y_adj - Adjusted coordinates configuring the away offensive zone to the left and the home offensive zone to the right
632
+ #Some games (mostly preseason and all star games) do not include coordinates.
633
+ try:
634
+ espn_events = adjust_coords(espn_events)
635
+ except KeyError:
636
+ print(f"No coordinates found for game ...")
637
+
638
+ espn_events['x_adj'] = np.nan
639
+ espn_events['y_adj'] = np.nan
640
+ espn_events['event_distance'] = np.nan
641
+ espn_events['event_angle'] = np.nan
649
642
 
650
- #Some games (mostly preseason and all star games) do not include coordinates.
651
- try:
652
- espn_events['x_fixed'] = abs(espn_events['coords_x'])
653
- espn_events['y_fixed'] = np.where(espn_events['coords_x']<0,-espn_events['coords_y'],espn_events['coords_y'])
654
- espn_events['x_adj'] = np.where(espn_events['homeAway']=="home",espn_events['x_fixed'],-espn_events['x_fixed'])
655
- espn_events['y_adj'] = np.where(espn_events['homeAway']=="home",espn_events['y_fixed'],-espn_events['y_fixed'])
656
- espn_events['event_distance'] = np.sqrt(((89 - espn_events['x_fixed'])**2) + (espn_events['y_fixed']**2))
657
- espn_events['event_angle'] = np.degrees(np.arctan2(abs(espn_events['y_fixed']), abs(89 - espn_events['x_fixed'])))
658
- except TypeError:
659
- print(f"No coordinates found for ESPN game...")
660
-
661
- espn_events['x_fixed'] = np.nan
662
- espn_events['y_fixed'] = np.nan
663
- espn_events['x_adj'] = np.nan
664
- espn_events['y_adj'] = np.nan
665
- espn_events['event_distance'] = np.nan
666
- espn_events['event_angle'] = np.nan
667
-
668
- #Assign score and fenwick for each event
669
- fenwick_events = ['missed-shot','shot-on-goal','goal']
670
- ag = 0
671
- ags = []
672
- hg = 0
673
- hgs = []
674
-
675
- af = 0
676
- afs = []
677
- hf = 0
678
- hfs = []
679
- for event,team in zip(list(espn_events['event_type']),list(espn_events['homeAway'])):
680
- if event in fenwick_events:
681
- if team == "home":
682
- hf += 1
683
- if event == 'goal':
684
- hg += 1
685
- else:
686
- af += 1
687
- if event == 'goal':
688
- ag += 1
689
-
690
- ags.append(ag)
691
- hgs.append(hg)
692
- afs.append(af)
693
- hfs.append(hf)
694
-
695
- espn_events['away_score'] = ags
696
- espn_events['home_score'] = hgs
697
- espn_events['away_fenwick'] = afs
698
- espn_events['home_fenwick'] = hfs
699
- #Return: play-by-play events in supplied game from ESPN
700
- return espn_events
643
+ #Return: play-by-play events in supplied game from ESPN
644
+ return espn_events
701
645
 
702
646
  def assign_target(data):
703
647
  #Assign target number to plays to assist with merging
@@ -712,26 +656,48 @@ def assign_target(data):
712
656
  #Revert sort and return dataframe
713
657
  return data.reset_index()
714
658
 
659
+ async def no_data():
660
+ #Allows the passage of espn_pbp data if it is not needed
661
+ pass
662
+
715
663
  async def combine_pbp(info,sources):
716
664
  #Given game info, return complete play-by-play data for provided game
717
665
 
718
666
  #Create tasks
719
667
  html_task = asyncio.create_task(parse_html(info))
720
668
  if info['season'] in [20052006, 20062007, 20072008, 20082009, 20092010]:
721
- json_task = asyncio.create_task(parse_espn(str(info['game_date']),info['away_team_abbr'],info['home_team_abbr']))
669
+ espn_task = asyncio.create_task(parse_espn(str(info['game_date']),info['away_team_abbr'],info['home_team_abbr']))
722
670
  json_type = 'espn'
723
671
  else:
724
- json_task = asyncio.create_task(parse_json(info))
672
+ espn_task = asyncio.create_task(no_data())
725
673
  json_type = 'nhl'
726
674
 
727
- html_pbp, json_pbp = await asyncio.gather(html_task, json_task)
675
+ json_task = asyncio.create_task(parse_json(info))
676
+
677
+ html_pbp, json_pbp, espn_pbp = await asyncio.gather(html_task, json_task, espn_task)
728
678
 
729
679
  #Route data combining - json if season is after 2009-2010:
730
680
  if json_type == 'espn':
731
681
  #ESPN x HTML
732
- espn_pbp = json_pbp.rename(columns={'coords_x':'x',"coords_y":'y'}).sort_values(['period','seconds_elapsed']).reset_index()
682
+ espn_pbp = espn_pbp.sort_values(['period','seconds_elapsed']).reset_index()
733
683
  merge_col = ['period','seconds_elapsed','event_type','event_team_abbr']
684
+
685
+ #Add additional information to espn_pbp with NHL json data
686
+ espn_pbp = pd.merge(espn_pbp,json_pbp,how='left')
734
687
 
688
+ if sources:
689
+ dirs_html = f'sources/{info['season']}/HTML/'
690
+ dirs_json = f'sources/{info['season']}/JSON/'
691
+
692
+ if not os.path.exists(dirs_html):
693
+ os.makedirs(dirs_html)
694
+ if not os.path.exists(dirs_json):
695
+ os.makedirs(dirs_json)
696
+
697
+ html_pbp.to_csv(f'{dirs_html}{info['game_id']}_HTML.csv',index=False)
698
+ espn_pbp.to_csv(f'{dirs_json}{info['game_id']}_JSON.csv',index=False)
699
+
700
+ print(f' merging on columns...',end="")
735
701
  #Merge pbp
736
702
  df = pd.merge(html_pbp,espn_pbp,how='left',on=merge_col)
737
703
 
@@ -832,7 +798,7 @@ def parse_shifts_json(info):
832
798
  def analyze_shifts(shift, id, name, pos, team):
833
799
  #Collects teams in given shifts html (parsed by Beautiful Soup)
834
800
  #Modified version of Harry Shomer's analyze_shifts function in the hockey_scraper package
835
- shifts = dict()
801
+ shifts = {}
836
802
 
837
803
  shifts['player_name'] = name.upper()
838
804
  shifts['player_id'] = id
@@ -869,28 +835,38 @@ def parse_shifts_html(info,home):
869
835
  td, teams = get_soup(doc)
870
836
 
871
837
  team = teams[0]
872
- players = dict()
838
+ players = {}
873
839
 
874
840
  # Iterates through each player shifts table with the following data:
875
841
  # Shift #, Period, Start, End, and Duration.
876
842
  for t in td:
877
843
  t = t.get_text()
878
- if ',' in t: # If a comma exists it is a player
844
+ if ',' in t and re.match(r'\d+', t): # If a comma and number exists it is a player
879
845
  name = t
880
846
 
881
847
  name = name.split(',')
882
848
  number = int(name[0][:2].strip())
883
- id = rosters[str(number)][4]
884
- players[id] = dict()
849
+ #In very rare cases a player listed will be among the scratches for the same game.
850
+ #Keeping these is more likely than not misattribution
851
+ try:
852
+ id = rosters[str(number)][4]
853
+ players[id] = {}
885
854
 
886
- #HTML shift functions assess one team at a time, which simplifies the lookup process with number to name and id
887
-
888
- players[id]['name'] = rosters[str(number)][2]
889
- players[id]['pos'] = rosters[str(number)][1]
855
+ #HTML shift functions assess one team at a time, which simplifies the lookup process with number to name and id
856
+
857
+ players[id]['name'] = rosters[str(number)][2]
858
+ players[id]['pos'] = rosters[str(number)][1]
890
859
 
891
- players[id]['shifts'] = []
860
+ players[id]['shifts'] = []
861
+ except KeyError:
862
+ continue
892
863
  else:
893
- players[id]['shifts'].extend([t])
864
+ #If id somehow is not assigned at any point before this is ran then just skip
865
+ try:
866
+ #Pushes shifts to current player
867
+ players[id]['shifts'].extend([t])
868
+ except UnboundLocalError:
869
+ continue
894
870
 
895
871
  for key in players.keys():
896
872
  # Create lists of shifts-table columns for analysis
@@ -1 +0,0 @@
1
- ### CODE IN THIS DIRECTORY ORIGINALLY (FULLY OR PARITALLY) WRITTEN BY HARRY SHOMER IN THE "hockey_scraper" PACKAGE