wsba-hockey 1.0.2__py3-none-any.whl → 1.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (130) hide show
  1. wsba_hockey/data_pipelines.py +183 -0
  2. wsba_hockey/evidence/weakside-breakout/node_modules/duckdb/vendor.py +146 -0
  3. wsba_hockey/evidence/weakside-breakout/node_modules/flatted/python/flatted.py +149 -0
  4. wsba_hockey/evidence/weakside-breakout/node_modules/flatted/python/test.py +63 -0
  5. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/gyp_main.py +45 -0
  6. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSNew.py +367 -0
  7. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSProject.py +206 -0
  8. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSSettings.py +1270 -0
  9. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSSettings_test.py +1547 -0
  10. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSToolFile.py +59 -0
  11. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSUserFile.py +153 -0
  12. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSUtil.py +271 -0
  13. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSVersion.py +574 -0
  14. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/__init__.py +690 -0
  15. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/common.py +661 -0
  16. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/common_test.py +78 -0
  17. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/easy_xml.py +165 -0
  18. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/easy_xml_test.py +109 -0
  19. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/flock_tool.py +55 -0
  20. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/__init__.py +0 -0
  21. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/analyzer.py +808 -0
  22. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/android.py +1173 -0
  23. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/cmake.py +1321 -0
  24. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/compile_commands_json.py +120 -0
  25. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/dump_dependency_json.py +103 -0
  26. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/eclipse.py +464 -0
  27. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/gypd.py +89 -0
  28. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/gypsh.py +58 -0
  29. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/make.py +2714 -0
  30. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/msvs.py +3981 -0
  31. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/msvs_test.py +44 -0
  32. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/ninja.py +2936 -0
  33. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/ninja_test.py +55 -0
  34. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/xcode.py +1394 -0
  35. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/xcode_test.py +25 -0
  36. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/input.py +3130 -0
  37. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/input_test.py +98 -0
  38. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/mac_tool.py +771 -0
  39. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/msvs_emulation.py +1271 -0
  40. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/ninja_syntax.py +174 -0
  41. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/simple_copy.py +61 -0
  42. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/win_tool.py +374 -0
  43. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/xcode_emulation.py +1939 -0
  44. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/xcode_ninja.py +302 -0
  45. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/xcodeproj_file.py +3197 -0
  46. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/xml_fix.py +65 -0
  47. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/test_gyp.py +261 -0
  48. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/tools/graphviz.py +102 -0
  49. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/tools/pretty_gyp.py +156 -0
  50. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/tools/pretty_sln.py +181 -0
  51. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/tools/pretty_vcproj.py +339 -0
  52. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/test/fixtures/test-charmap.py +31 -0
  53. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/update-gyp.py +64 -0
  54. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/gyp_main.py +45 -0
  55. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSNew.py +367 -0
  56. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSProject.py +206 -0
  57. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSSettings.py +1270 -0
  58. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSSettings_test.py +1547 -0
  59. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSToolFile.py +59 -0
  60. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSUserFile.py +153 -0
  61. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSUtil.py +271 -0
  62. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSVersion.py +574 -0
  63. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/__init__.py +666 -0
  64. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/common.py +654 -0
  65. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/common_test.py +78 -0
  66. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/easy_xml.py +165 -0
  67. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/easy_xml_test.py +109 -0
  68. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/flock_tool.py +55 -0
  69. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/__init__.py +0 -0
  70. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/analyzer.py +808 -0
  71. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/android.py +1173 -0
  72. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/cmake.py +1321 -0
  73. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/compile_commands_json.py +120 -0
  74. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/dump_dependency_json.py +103 -0
  75. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/eclipse.py +464 -0
  76. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/gypd.py +89 -0
  77. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/gypsh.py +58 -0
  78. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/make.py +2518 -0
  79. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/msvs.py +3978 -0
  80. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/msvs_test.py +44 -0
  81. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/ninja.py +2936 -0
  82. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/ninja_test.py +55 -0
  83. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/xcode.py +1394 -0
  84. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/xcode_test.py +25 -0
  85. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/input.py +3137 -0
  86. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/input_test.py +98 -0
  87. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/mac_tool.py +771 -0
  88. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/msvs_emulation.py +1271 -0
  89. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/ninja_syntax.py +174 -0
  90. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/simple_copy.py +61 -0
  91. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/win_tool.py +374 -0
  92. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/xcode_emulation.py +1939 -0
  93. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/xcode_ninja.py +302 -0
  94. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/xcodeproj_file.py +3197 -0
  95. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/xml_fix.py +65 -0
  96. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/setup.py +42 -0
  97. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/test_gyp.py +260 -0
  98. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/tools/graphviz.py +102 -0
  99. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/tools/pretty_gyp.py +156 -0
  100. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/tools/pretty_sln.py +181 -0
  101. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/tools/pretty_vcproj.py +339 -0
  102. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/test/fixtures/test-charmap.py +31 -0
  103. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/update-gyp.py +46 -0
  104. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/game_stats/app.py +401 -0
  105. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/game_stats/name_fix.py +47 -0
  106. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/heatmaps/app.py +108 -0
  107. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/heatmaps/plot.py +93 -0
  108. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/heatmaps/rink_plot.py +245 -0
  109. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/matchups/app.py +145 -0
  110. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/matchups/plot.py +77 -0
  111. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/matchups/rink_plot.py +245 -0
  112. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/pbp/app.py +389 -0
  113. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/pbp/plot.py +70 -0
  114. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/pbp/rink_plot.py +245 -0
  115. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/skater/app.py +110 -0
  116. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/skater/plot.py +58 -0
  117. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/skater/rink_plot.py +245 -0
  118. wsba_hockey/tools/agg.py +242 -53
  119. wsba_hockey/tools/plotting.py +15 -20
  120. wsba_hockey/tools/scraping.py +149 -258
  121. wsba_hockey/tools/xg_model.py +370 -298
  122. wsba_hockey/workspace.py +22 -101
  123. wsba_hockey/wsba_main.py +494 -147
  124. {wsba_hockey-1.0.2.dist-info → wsba_hockey-1.0.4.dist-info}/METADATA +2 -2
  125. wsba_hockey-1.0.4.dist-info/RECORD +135 -0
  126. {wsba_hockey-1.0.2.dist-info → wsba_hockey-1.0.4.dist-info}/WHEEL +1 -1
  127. wsba_hockey/stats/calculate_viz/shot_impact.py +0 -2
  128. wsba_hockey-1.0.2.dist-info/RECORD +0 -19
  129. {wsba_hockey-1.0.2.dist-info → wsba_hockey-1.0.4.dist-info}/licenses/LICENSE +0 -0
  130. {wsba_hockey-1.0.2.dist-info → wsba_hockey-1.0.4.dist-info}/top_level.txt +0 -0
wsba_hockey/wsba_main.py CHANGED
@@ -1,13 +1,12 @@
1
1
  import requests as rs
2
2
  import pandas as pd
3
- import numpy as np
4
- from datetime import datetime, timedelta, date
5
3
  import time
6
4
  import random
7
- from .tools.scraping import *
8
- from .tools.xg_model import *
9
- from .tools.agg import *
10
- from .tools.plotting import *
5
+ from datetime import datetime, timedelta, date
6
+ from tools.scraping import *
7
+ from tools.xg_model import *
8
+ from tools.agg import *
9
+ from tools.plotting import *
11
10
 
12
11
  ### WSBA HOCKEY ###
13
12
  ## Provided below are all integral functions in the WSBA Hockey Python package. ##
@@ -59,7 +58,7 @@ convert_team_abbr = {'L.A':'LAK',
59
58
  'T.B':'TBL',
60
59
  'PHX':'ARI'}
61
60
 
62
- per_sixty = ['Fi','xGi','Gi','A1','A2','P1','P','FF','FA','xGF','xGA','GF','GA']
61
+ per_sixty = ['Fi','xGi','Gi','A1','A2','P1','P','OZF','NZF','DZF','FF','FA','xGF','xGA','GF','GA','CF','CA','HF','HA','Give','Take','Penl','Penl2','Penl5','Draw','Block']
63
62
 
64
63
  #Some games in the API are specifically known to cause errors in scraping.
65
64
  #This list is updated as frequently as necessary
@@ -73,15 +72,12 @@ known_probs ={
73
72
  '2009020658':'Missing shifts data for game between New York Islanders and Dallas.',
74
73
  '2009020885':'Missing shifts data for game between Sharks and Blue Jackets.',
75
74
  '2010020124':'Game between Capitals and Hurricanes is sporadically missing player on-ice data',
75
+ '2012020018':'HTML events contain mislabeled events.',
76
76
  '2013020971':'On March 10th, 2014, Stars forward Rich Peverley suffered from a cardiac episode midgame and as a result, the remainder of the game was postponed. \nThe game resumed on April 9th, and the only goal scorer in the game, Blue Jackets forward Nathan Horton, did not appear in the resumed game due to injury. Interestingly, Horton would never play in the NHL again.',
77
77
  '2018021133':'Game between Lightning and Capitals has incorrectly labeled event teams (i.e. WSH TAKEAWAY - #71 CIRELLI (Cirelli is a Tampa Bay skater in this game)).',
78
78
  '2019020876':'Due to the frightening collapse of Blues defensemen Jay Bouwmeester, a game on February 2nd, 2020 between the Ducks and Blues was postponed. \nWhen the game resumed, Ducks defensemen Hampus Lindholm, who assisted on a goal in the inital game, did not play in the resumed match.'
79
79
  }
80
80
 
81
- name_change = {
82
- "":"",
83
- }
84
-
85
81
  shot_types = ['wrist','deflected','tip-in','slap','backhand','snap','wrap-around','poke','bat','cradle','between-legs']
86
82
 
87
83
  new = 2024
@@ -107,14 +103,17 @@ standings_end = {
107
103
  '20242025':'04-17'
108
104
  }
109
105
 
106
+ events = ['faceoff','hit','giveaway','takeaway','blocked-shot','missed-shot','shot-on-goal','goal','penalty']
107
+
110
108
  ## SCRAPE FUNCTIONS ##
111
- def nhl_scrape_game(game_ids,split_shifts = False, remove = ['period-start','period-end','challenge','stoppage'],verbose = False, errors = False):
109
+ def nhl_scrape_game(game_ids,split_shifts = False, remove = ['period-start','period-end','challenge','stoppage','shootout-complete','game-end'],verbose = False, sources = False, errors = False):
112
110
  #Given a set of game_ids (NHL API), return complete play-by-play information as requested
113
111
  # param 'game_ids' - NHL game ids (or list formatted as ['random', num_of_games, start_year, end_year])
114
112
  # param 'split_shifts' - boolean which splits pbp and shift events if true
115
113
  # param 'remove' - list of events to remove from final dataframe
116
114
  # param 'xg' - xG model to apply to pbp for aggregation
117
115
  # param 'verbose' - boolean which adds additional event info if true
116
+ # param 'sources - boolean scraping the html and json sources to a master directory if true
118
117
  # param 'errors' - boolean returning game ids which did not scrape if true
119
118
 
120
119
  pbps = []
@@ -154,6 +153,7 @@ def nhl_scrape_game(game_ids,split_shifts = False, remove = ['period-start','per
154
153
  #Scrape each game
155
154
  #Track Errors
156
155
  error_ids = []
156
+ prog = 0
157
157
  for game_id in game_ids:
158
158
  print("Scraping data from game " + str(game_id) + "...",end="")
159
159
  start = time.perf_counter()
@@ -161,15 +161,25 @@ def nhl_scrape_game(game_ids,split_shifts = False, remove = ['period-start','per
161
161
  try:
162
162
  #Retrieve data
163
163
  info = get_game_info(game_id)
164
- data = combine_data(info)
164
+ data = combine_data(info, sources)
165
165
 
166
166
  #Append data to list
167
167
  pbps.append(data)
168
168
 
169
169
  end = time.perf_counter()
170
170
  secs = end - start
171
- print(f" finished in {secs:.2f} seconds.")
171
+ prog += 1
172
+
173
+ #Export if sources is true
174
+ if sources:
175
+ dirs = f'sources/{info['season']}/'
172
176
 
177
+ if not os.path.exists(dirs):
178
+ os.makedirs(dirs)
179
+
180
+ data.to_csv(f'{dirs}{info['game_id']}.csv',index=False)
181
+
182
+ print(f" finished in {secs:.2f} seconds. {prog}/{len(game_ids)} ({(prog/len(game_ids))*100:.2f}%)")
173
183
  except:
174
184
  #Games such as the all-star game and pre-season games will incur this error
175
185
  #Other games have known problems
@@ -180,7 +190,7 @@ def nhl_scrape_game(game_ids,split_shifts = False, remove = ['period-start','per
180
190
 
181
191
  #Track error
182
192
  error_ids.append(game_id)
183
-
193
+
184
194
  #Add all pbps together
185
195
  if len(pbps) == 0:
186
196
  print("\rNo data returned.")
@@ -256,30 +266,37 @@ def nhl_scrape_schedule(season,start = "09-01", end = "08-01"):
256
266
  #Handles dates which are over a year apart
257
267
  day = 365 + day
258
268
  for i in range(day):
259
- #For each day, call NHL api and retreive id, season, season_type (1,2,3), and gamecenter link
269
+ #For each day, call NHL api and retreive info on all games of selected game
260
270
  inc = start+timedelta(days=i)
261
271
  print("Scraping games on " + str(inc)[:10]+"...")
262
272
 
263
273
  get = rs.get(api+str(inc)[:10]).json()
264
- gameWeek = list(pd.json_normalize(get['gameWeek'])['games'])[0]
265
-
266
- for i in range(0,len(gameWeek)):
267
- game.append(pd.DataFrame({
268
- "id": [gameWeek[i]['id']],
269
- "season": [gameWeek[i]['season']],
270
- "season_type":[gameWeek[i]['gameType']],
271
- "away_team_abbr":[gameWeek[i]['awayTeam']['abbrev']],
272
- "home_team_abbr":[gameWeek[i]['homeTeam']['abbrev']],
273
- "gamecenter_link":[gameWeek[i]['gameCenterLink']]
274
- }))
275
-
274
+ gameWeek = pd.json_normalize(list(pd.json_normalize(get['gameWeek'])['games'])[0])
275
+
276
+ #Return nothing if there's nothing
277
+ if gameWeek.empty:
278
+ game.append(gameWeek)
279
+ else:
280
+ gameWeek['date'] = get['gameWeek'][0]['date']
281
+
282
+ gameWeek['season_type'] = gameWeek['gameType']
283
+ gameWeek['away_team_abbr'] = gameWeek['awayTeam.abbrev']
284
+ gameWeek['home_team_abbr'] = gameWeek['homeTeam.abbrev']
285
+ gameWeek['game_title'] = gameWeek['away_team_abbr'] + " @ " + gameWeek['home_team_abbr'] + " - " + gameWeek['date']
286
+ gameWeek['estStartTime'] = pd.to_datetime(gameWeek['startTimeUTC']).dt.tz_convert('US/Eastern').dt.strftime("%I:%M %p")
287
+
288
+ front_col = ['id','season','date','season_type','game_title','away_team_abbr','home_team_abbr','estStartTime']
289
+ gameWeek = gameWeek[front_col+[col for col in gameWeek.columns.to_list() if col not in front_col]]
290
+
291
+ game.append(gameWeek)
292
+
276
293
  #Concatenate all games
277
294
  df = pd.concat(game)
278
295
 
279
296
  #Return: specificed schedule data
280
297
  return df
281
298
 
282
- def nhl_scrape_season(season,split_shifts = False, season_types = [2,3], remove = ['period-start','period-end','game-end','challenge','stoppage'], start = "09-01", end = "08-01", local=False, local_path = "schedule/schedule.csv", verbose = False, errors = False):
299
+ def nhl_scrape_season(season,split_shifts = False, season_types = [2,3], remove = ['period-start','period-end','game-end','challenge','stoppage'], start = "09-01", end = "08-01", local=False, local_path = "schedule/schedule.csv", verbose = False, sources = False, errors = False):
283
300
  #Given season, scrape all play-by-play occuring within the season
284
301
  # param 'season' - NHL season to scrape
285
302
  # param 'split_shifts' - boolean which splits pbp and shift events if true
@@ -289,12 +306,21 @@ def nhl_scrape_season(season,split_shifts = False, season_types = [2,3], remove
289
306
  # param 'local' - boolean indicating whether to use local file to scrape game_ids
290
307
  # param 'local_path' - path of local file
291
308
  # param 'verbose' - boolean which adds additional event info if true
309
+ # param 'sources - boolean scraping the html and json sources to a master directory if true
292
310
  # param 'errors' - boolean returning game ids which did not scrape if true
293
311
 
294
312
  #Determine whether to use schedule data in repository or to scrape
295
- if local == True:
313
+ if local:
296
314
  load = pd.read_csv(local_path)
297
- load = load.loc[(load['season'].astype(str)==season)&(load['season_type'].isin(season_types))]
315
+ load['date'] = pd.to_datetime(load['date'])
316
+
317
+ start = f'{(season[0:4] if int(start[0:2])>=9 else season[4:8])}-{int(start[0:2])}-{int(start[3:5])}'
318
+ end = f'{(season[0:4] if int(end[0:2])>=9 else season[4:8])}-{int(end[0:2])}-{int(end[3:5])}'
319
+
320
+ load = load.loc[(load['season'].astype(str)==season)&
321
+ (load['season_type'].isin(season_types))&
322
+ (load['date']>=start)&(load['date']<=end)]
323
+
298
324
  game_ids = list(load['id'].astype(str))
299
325
  else:
300
326
  load = nhl_scrape_schedule(season,start,end)
@@ -310,31 +336,17 @@ def nhl_scrape_season(season,split_shifts = False, season_types = [2,3], remove
310
336
  start = time.perf_counter()
311
337
 
312
338
  #Perform scrape
313
- if split_shifts == True:
314
- data = nhl_scrape_game(game_ids,split_shifts=True,remove=remove,verbose=verbose,errors=errors)
339
+ if split_shifts:
340
+ data = nhl_scrape_game(game_ids,split_shifts=True,remove=remove,verbose=verbose,sources=sources,errors=errors)
315
341
  else:
316
- data = nhl_scrape_game(game_ids,remove=remove,verbose=verbose,errors=errors)
342
+ data = nhl_scrape_game(game_ids,remove=remove,verbose=verbose,sources=sources,errors=errors)
317
343
 
318
344
  end = time.perf_counter()
319
345
  secs = end - start
320
346
 
321
347
  print(f'Finished season scrape in {(secs/60)/60:.2f} hours.')
322
348
  #Return: Complete pbp and shifts data for specified season as well as dataframe of game_ids which failed to return data
323
- if split_shifts == True:
324
- pbp_dict = {'pbp':data['pbp'],
325
- 'shifts':data['shifts']}
326
-
327
- if errors:
328
- pbp_dict.update({'errors':data['errors']})
329
- return pbp_dict
330
- else:
331
- pbp = data
332
- if errors:
333
- pbp_dict = {'pbp':pbp,
334
- 'errors':data['errors']}
335
- return pbp_dict
336
- else:
337
- return pbp
349
+ return data
338
350
 
339
351
  def nhl_scrape_seasons_info(seasons = []):
340
352
  #Returns info related to NHL seasons (by default, all seasons are included)
@@ -449,17 +461,24 @@ def nhl_scrape_team_info(country = False):
449
461
 
450
462
  return data.sort_values(by=(['country3Code','countryCode','iocCode','countryName'] if country else ['fullName','triCode','id']))
451
463
 
452
- def nhl_scrape_player_data(player_id):
464
+ def nhl_scrape_player_data(player_ids):
453
465
  #Given player id, return player information
454
- api = f'https://api-web.nhle.com/v1/player/{player_id}/landing'
466
+ infos = []
467
+ for player_id in player_ids:
468
+ player_id = int(player_id)
469
+ api = f'https://api-web.nhle.com/v1/player/{player_id}/landing'
470
+
471
+ data = pd.json_normalize(rs.get(api).json())
455
472
 
456
- data = pd.json_normalize(rs.get(api).json())
473
+ #Add name column
474
+ data['fullName'] = (data['firstName.default'] + " " + data['lastName.default']).str.upper()
457
475
 
458
- #Add name column
459
- data['fullName'] = (data['firstName.default'] + " " + data['lastName.default']).str.upper()
476
+ #Append
477
+ infos.append(data)
460
478
 
479
+ df = pd.concat(infos)
461
480
  #Return: player data
462
- return data
481
+ return df
463
482
 
464
483
  def nhl_scrape_draft_rankings(arg = 'now', category = ''):
465
484
  #Given url argument for timeframe and prospect category, return draft rankings
@@ -478,12 +497,26 @@ def nhl_scrape_draft_rankings(arg = 'now', category = ''):
478
497
  #Return: prospect rankings
479
498
  return data
480
499
 
481
- def nhl_shooting_impacts(agg,team=False):
500
+ def nhl_apply_xG(pbp):
501
+ #Given play-by-play data, return this data with xG-related columns
502
+
503
+ #param 'pbp' - play-by-play data
504
+
505
+ print(f'Applying WSBA xG to model with seasons: {pbp['season'].drop_duplicates().to_list()}')
506
+ #Fix player data
507
+ #pbp = fix_players(pbp)
508
+
509
+ #Apply xG model
510
+ pbp = wsba_xG(pbp)
511
+
512
+ return pbp
513
+
514
+ def nhl_shooting_impacts(agg,type):
482
515
  #Given stats table generated from the nhl_calculate_stats function, return table with shot impacts
483
516
  #Only 5v5 is supported as of now
484
517
 
485
518
  #param 'agg' - stats table
486
- #param 'team' - boolean determining if team stats are calculated instead of skater stats
519
+ #param 'type' - type of stats to calculate ('skater', 'goalie', or 'team')
487
520
 
488
521
  #COMPOSITE IMPACT EVALUATIONS:
489
522
 
@@ -509,7 +542,7 @@ def nhl_shooting_impacts(agg,team=False):
509
542
 
510
543
  return rate+qual+fini
511
544
 
512
- if team:
545
+ if type == 'goalie':
513
546
  pos = agg
514
547
  for group in [('OOFF','F'),('ODEF','A')]:
515
548
  #Have to set this columns for compatibility with df.apply
@@ -536,35 +569,172 @@ def nhl_shooting_impacts(agg,team=False):
536
569
  pos[f'{group[0]}-SRI'] = pos['g'] - pos.apply(lambda x: goal_comp(avg_fen,x.xg_fen,x.xg,x.g,avg_fsh),axis=1)
537
570
  pos[f'{group[0]}-SQI'] = pos['g'] - pos.apply(lambda x: goal_comp(x.fenwick,avg_xg_fen,x.xg,x.g,avg_fsh),axis=1)
538
571
  pos[f'{group[0]}-FNI'] = pos['g'] - pos.apply(lambda x: goal_comp(x.fenwick,x.xg_fen,avg_xg,avg_g,avg_fsh),axis=1)
572
+
573
+ #Convert impacts to totals
574
+ #Calculate shot rate, shot quality, and finishing impacts
575
+ pos[f'{group[0]}-SRI-T'] = (pos[f'{group[0]}-SRI']/60)*pos['TOI']
576
+ pos[f'{group[0]}-SQI-T'] = (pos[f'{group[0]}-SQI']/60)*pos['TOI']
577
+ pos[f'{group[0]}-FNI-T'] = (pos[f'{group[0]}-FNI']/60)*pos['TOI']
539
578
 
579
+ #Rank per 60 stats
580
+ for stat in ['FF','FA','xGF','xGA','GF','GA','CF','CA','GSAx']:
581
+ pos[f'{stat}/60-P'] = pos[f'{stat}/60'].rank(pct=True)
582
+
583
+ #Flip percentiles for against stats
584
+ for stat in ['FA','xGA','GA','CA']:
585
+ pos[f'{stat}/60-P'] = 1-pos[f'{stat}/60-P']
586
+
540
587
  #Add extra metrics
541
588
  pos['RushF/60'] = (pos['RushF']/pos['TOI'])*60
542
589
  pos['RushA/60'] = (pos['RushA']/pos['TOI'])*60
543
- pos['Rushes FF'] = pos['RushF/60'].rank(pct=True)
544
- pos['Rushes FA'] = pos['RushA/60'].rank(pct=True)
590
+ pos['RushesFF'] = pos['RushF/60'].rank(pct=True)
591
+ pos['RushesFA'] = 1 - pos['RushA/60'].rank(pct=True)
545
592
  pos['RushFxG/60'] = (pos['RushFxG']/pos['TOI'])*60
546
593
  pos['RushAxG/60'] = (pos['RushAxG']/pos['TOI'])*60
547
- pos['Rushes xGF'] = pos['RushFxG/60'].rank(pct=True)
548
- pos['Rushes xGA'] = pos['RushAxG/60'].rank(pct=True)
594
+ pos['RushesxGF'] = pos['RushFxG/60'].rank(pct=True)
595
+ pos['RushesxGA'] = 1 - pos['RushAxG/60'].rank(pct=True)
549
596
  pos['RushFG/60'] = (pos['RushFG']/pos['TOI'])*60
550
597
  pos['RushAG/60'] = (pos['RushAG']/pos['TOI'])*60
551
- pos['Rushes GF'] = pos['RushFG/60'].rank(pct=True)
552
- pos['Rushes GA'] = pos['RushAG/60'].rank(pct=True)
598
+ pos['RushesGF'] = pos['RushFG/60'].rank(pct=True)
599
+ pos['RushesGA'] = 1 - pos['RushAG/60'].rank(pct=True)
553
600
 
554
601
  #Flip against metric percentiles
555
602
  pos['ODEF-SR'] = 1-pos['ODEF-SR']
556
603
  pos['ODEF-SQ'] = 1-pos['ODEF-SQ']
557
604
  pos['ODEF-FN'] = 1-pos['ODEF-FN']
558
605
 
606
+ #Extraneous Values
607
+ pos['EGF'] = pos['OOFF-SRI']+pos['OOFF-SQI']+pos['OOFF-FNI']
608
+ pos['ExGF'] = pos['OOFF-SRI']+pos['OOFF-SQI']
609
+ pos['EGA'] = pos['ODEF-SRI']+pos['ODEF-SQI']+pos['ODEF-FNI']
610
+ pos['ExGA'] = pos['ODEF-SRI']+pos['ODEF-SQI']
611
+
612
+ #...and their percentiles
613
+ pos['EGF-P'] = pos['EGF'].rank(pct=True)
614
+ pos['ExGF-P'] = pos['ExGF'].rank(pct=True)
615
+ pos['EGA-P'] = pos['EGA'].rank(pct=True)
616
+ pos['ExGA-P'] = pos['ExGA'].rank(pct=True)
617
+
618
+ pos['EGA-P'] = 1-pos['EGA']
619
+ pos['ExGA-P'] = 1-pos['ExGA']
620
+
621
+ #...and then their totals
622
+ pos['EGF-T'] = (pos['EGF']/60)*pos['TOI']
623
+ pos['ExGF-T'] = (pos['ExGF']/60)*pos['TOI']
624
+ pos['EGA-T'] = (pos['EGA']/60)*pos['TOI']
625
+ pos['ExGA-T'] = (pos['ExGA']/60)*pos['TOI']
626
+
627
+ #Goal Composites...
628
+ pos['Team-Adjusted-EGI'] = pos['ODEF-FNI']-pos['ExGA']
629
+ pos['GISAx'] = pos['ExGA']-pos['EGA']
630
+ pos['NetGI'] = pos['EGF'] - pos['EGA']
631
+ pos['NetxGI'] = pos['ExGF'] - pos['ExGA']
632
+
633
+ #...and their percentiles
634
+ pos['Team-Adjusted-EGI-P'] = pos['Team-Adjusted-EGI'].rank(pct=True)
635
+ pos['GISAx-P'] = pos['GISAx'].rank(pct=True)
636
+ pos['NetGI-P'] = pos['NetGI'].rank(pct=True)
637
+ pos['NetxGI-P'] = pos['NetxGI'].rank(pct=True)
638
+
639
+ #...and then their totals
640
+ pos['Team-Adjusted-EGI-T'] = (pos['Team-Adjusted-EGI']/60)*pos['TOI']
641
+ pos['GISAx-T'] = (pos['GISAx']/60)*pos['TOI']
642
+ pos['NetGI-T'] = (pos['NetGI']/60)*pos['TOI']
643
+ pos['NetxGI-T'] = (pos['NetxGI']/60)*pos['TOI']
644
+
559
645
  #Return: team stats with shooting impacts
560
- return pos.drop(columns=['fsh','fenwick','xg_fen','xg','g','finishing']).sort_values(['Season','Team'])
646
+ return pos.drop(columns=['fsh','fenwick','xg_fen','xg','g','finishing']).sort_values(['Goalie','Season','Team'])
647
+
648
+ elif type =='team':
649
+ pos = agg
650
+ for group in [('OOFF','F'),('ODEF','A')]:
651
+ #Have to set this columns for compatibility with df.apply
652
+ pos['fsh'] = pos[f'Fsh{group[1]}%']
653
+ pos['fenwick'] = pos[f'F{group[1]}/60']
654
+ pos['xg'] = pos[f'xG{group[1]}/60']
655
+ pos['g'] = pos[f'G{group[1]}/60']
656
+ pos['xg_fen'] = pos[f'xG{group[1]}/F{group[1]}']
657
+ pos['finishing'] = pos[f'G{group[1]}/xG{group[1]}']
658
+
659
+ #Find average for position in frame
660
+ avg_fen = pos['fenwick'].mean()
661
+ avg_xg = pos['xg'].mean()
662
+ avg_g = pos['g'].mean()
663
+ avg_fsh = avg_g/avg_fen
664
+ avg_xg_fen = avg_xg/avg_fen
665
+
666
+ #Calculate composite percentiles
667
+ pos[f'{group[0]}-SR'] = pos['fenwick'].rank(pct=True)
668
+ pos[f'{group[0]}-SQ'] = pos['xg_fen'].rank(pct=True)
669
+ pos[f'{group[0]}-FN'] = pos['finishing'].rank(pct=True)
670
+
671
+ #Calculate shot rate, shot quality, and finishing impacts
672
+ pos[f'{group[0]}-SRI'] = pos['g'] - pos.apply(lambda x: goal_comp(avg_fen,x.xg_fen,x.xg,x.g,avg_fsh),axis=1)
673
+ pos[f'{group[0]}-SQI'] = pos['g'] - pos.apply(lambda x: goal_comp(x.fenwick,avg_xg_fen,x.xg,x.g,avg_fsh),axis=1)
674
+ pos[f'{group[0]}-FNI'] = pos['g'] - pos.apply(lambda x: goal_comp(x.fenwick,x.xg_fen,avg_xg,avg_g,avg_fsh),axis=1)
675
+
676
+ #Convert impacts to totals
677
+ #Calculate shot rate, shot quality, and finishing impacts
678
+ pos[f'{group[0]}-SRI-T'] = (pos[f'{group[0]}-SRI']/60)*pos['TOI']
679
+ pos[f'{group[0]}-SQI-T'] = (pos[f'{group[0]}-SQI']/60)*pos['TOI']
680
+ pos[f'{group[0]}-FNI-T'] = (pos[f'{group[0]}-FNI']/60)*pos['TOI']
681
+
682
+ #Rank per 60 stats
683
+ for stat in per_sixty[10:len(per_sixty)]:
684
+ pos[f'{stat}/60-P'] = pos[f'{stat}/60'].rank(pct=True)
685
+
686
+ #Flip percentiles for against stats
687
+ for stat in ['FA','xGA','GA','CA','HA','Give','Penl','Penl2','Penl5']:
688
+ pos[f'{stat}/60-P'] = 1-pos[f'{stat}/60-P']
689
+
690
+ #Add extra metrics
691
+ pos['RushF/60'] = (pos['RushF']/pos['TOI'])*60
692
+ pos['RushA/60'] = (pos['RushA']/pos['TOI'])*60
693
+ pos['RushesFF'] = pos['RushF/60'].rank(pct=True)
694
+ pos['RushesFA'] = 1 - pos['RushA/60'].rank(pct=True)
695
+ pos['RushFxG/60'] = (pos['RushFxG']/pos['TOI'])*60
696
+ pos['RushAxG/60'] = (pos['RushAxG']/pos['TOI'])*60
697
+ pos['RushesxGF'] = pos['RushFxG/60'].rank(pct=True)
698
+ pos['RushesxGA'] = 1 - pos['RushAxG/60'].rank(pct=True)
699
+ pos['RushFG/60'] = (pos['RushFG']/pos['TOI'])*60
700
+ pos['RushAG/60'] = (pos['RushAG']/pos['TOI'])*60
701
+ pos['RushesGF'] = pos['RushFG/60'].rank(pct=True)
702
+ pos['RushesGA'] = 1 - pos['RushAG/60'].rank(pct=True)
703
+
704
+ #Flip against metric percentiles
705
+ pos['ODEF-SR'] = 1-pos['ODEF-SR']
706
+ pos['ODEF-SQ'] = 1-pos['ODEF-SQ']
707
+ pos['ODEF-FN'] = 1-pos['ODEF-FN']
561
708
 
709
+ pos['EGF'] = pos['OOFF-SRI']+pos['OOFF-SQI']+pos['OOFF-FNI']
710
+ pos['ExGF'] = pos['OOFF-SRI']+pos['OOFF-SQI']
711
+ pos['EGA'] = pos['ODEF-SRI']+pos['ODEF-SQI']+pos['ODEF-FNI']
712
+ pos['ExGA'] = pos['ODEF-SRI']+pos['ODEF-SQI']
713
+
714
+ #...and their percentiles
715
+ pos['EGF-P'] = pos['EGF'].rank(pct=True)
716
+ pos['ExGF-P'] = pos['ExGF'].rank(pct=True)
717
+ pos['EGA-P'] = pos['EGA'].rank(pct=True)
718
+ pos['ExGA-P'] = pos['ExGA'].rank(pct=True)
719
+
720
+ pos['EGA-P'] = 1-pos['EGA']
721
+ pos['ExGA-P'] = 1-pos['ExGA']
722
+
723
+ #...and then their totals
724
+ pos['EGF-T'] = (pos['EGF']/60)*pos['TOI']
725
+ pos['ExGF-T'] = (pos['ExGF']/60)*pos['TOI']
726
+ pos['EGA-T'] = (pos['EGA']/60)*pos['TOI']
727
+ pos['ExGA-T'] = (pos['ExGA']/60)*pos['TOI']
728
+
729
+ #Return: team stats with shooting impacts
730
+ return pos.drop(columns=['fsh','fenwick','xg_fen','xg','g','finishing']).sort_values(['Season','Team'])
562
731
 
563
732
  else:
564
733
  #Remove skaters with less than 150 minutes of TOI then split between forwards and dmen
565
- agg = agg.loc[agg['TOI']>=150]
566
- forwards = agg.loc[agg['Position']!='D']
567
- defensemen = agg.loc[agg['Position']=='D']
734
+ #These are added back in after the fact
735
+ forwards = agg.loc[(agg['Position']!='D')&(agg['TOI']>=150)]
736
+ defensemen = agg.loc[(agg['Position']=='D')&(agg['TOI']>=150)]
737
+ non_players = agg.loc[agg['TOI']<150]
568
738
 
569
739
  #Loop through both positions, all groupings (INDV, OOFF, and ODEF) generating impacts
570
740
  for pos in [forwards,defensemen]:
@@ -594,15 +764,29 @@ def nhl_shooting_impacts(agg,team=False):
594
764
  pos[f'{group[0]}-SQI'] = pos['g'] - pos.apply(lambda x: goal_comp(x.fenwick,avg_xg_fen,x.xg,x.g,avg_fsh),axis=1)
595
765
  pos[f'{group[0]}-FNI'] = pos['g'] - pos.apply(lambda x: goal_comp(x.fenwick,x.xg_fen,avg_xg,avg_g,avg_fsh),axis=1)
596
766
 
767
+ #Convert impacts to totals
768
+ #Calculate shot rate, shot quality, and finishing impacts
769
+ pos[f'{group[0]}-SRI-T'] = (pos[f'{group[0]}-SRI']/60)*pos['TOI']
770
+ pos[f'{group[0]}-SQI-T'] = (pos[f'{group[0]}-SQI']/60)*pos['TOI']
771
+ pos[f'{group[0]}-FNI-T'] = (pos[f'{group[0]}-FNI']/60)*pos['TOI']
772
+
597
773
  #Calculate On-Ice Involvement Percentiles
598
- pos['Fenwick'] = pos['FC%'].rank(pct=True)
599
- pos['xG'] = pos['xGC%'].rank(pct=True)
600
- pos['Goal Factor'] = pos['GI%'].rank(pct=True)
601
- pos['Goal Scoring'] = pos['GC%'].rank(pct=True)
602
- pos['Rush/60'] = (pos['Rush']/pos['TOI'])*60
603
- pos['RushxG/60'] = (pos['Rush xG']/pos['TOI'])*60
604
- pos['Rushes xG'] = pos['RushxG/60'].rank(pct=True)
605
- pos['Rushes FF'] = pos['Rush/60'].rank(pct=True)
774
+ pos['Fi/F'] = pos['FC%'].rank(pct=True)
775
+ pos['xGi/F'] = pos['xGC%'].rank(pct=True)
776
+ pos['Pi/F'] = pos['GI%'].rank(pct=True)
777
+ pos['Gi/F'] = pos['GC%'].rank(pct=True)
778
+ pos['RushFi/60'] = (pos['Rush']/pos['TOI'])*60
779
+ pos['RushxGi/60'] = (pos['Rush xG']/pos['TOI'])*60
780
+ pos['RushesxGi'] = pos['RushxGi/60'].rank(pct=True)
781
+ pos['RushesFi'] = pos['RushFi/60'].rank(pct=True)
782
+
783
+ #Rank per 60 stats
784
+ for stat in per_sixty:
785
+ pos[f'{stat}/60-P'] = pos[f'{stat}/60'].rank(pct=True)
786
+
787
+ #Flip percentiles for against stats
788
+ for stat in ['FA','xGA','GA','CA','HA','Give','Penl','Penl2','Penl5']:
789
+ pos[f'{stat}/60-P'] = 1-pos[f'{stat}/60-P']
606
790
 
607
791
  #Add positions back together
608
792
  complete = pd.concat([forwards,defensemen])
@@ -613,97 +797,253 @@ def nhl_shooting_impacts(agg,team=False):
613
797
  complete['ODEF-FN'] = 1-complete['ODEF-FN']
614
798
 
615
799
  #Extraneous Values
616
- complete['Extraneous Gi'] = complete['INDV-SRI']+complete['INDV-SQI']+complete['INDV-FNI']
617
- complete['Extraneous xGi'] = complete['INDV-SRI']+complete['INDV-SQI']
618
- complete['Extraneous GF'] = complete['OOFF-SRI']+complete['OOFF-SQI']+complete['OOFF-FNI']
619
- complete['Extraneous xGF'] = complete['OOFF-SRI']+complete['OOFF-SQI']
620
- complete['Extraneous GA'] = complete['ODEF-SRI']+complete['ODEF-SQI']+complete['ODEF-FNI']
621
- complete['Extraneous xGA'] = complete['ODEF-SRI']+complete['ODEF-SQI']
622
-
623
- #Goal Composites
624
- complete['Linemate Extraneous Goals'] = complete['Extraneous GF'] - complete['Extraneous Gi']
625
- complete['Linemate Goal Induction'] = complete['Linemate Extraneous Goals']*complete['AC%']
626
- complete['Composite Goal Impact'] = complete['Extraneous Gi'] + complete['Linemate Goal Induction']
627
- complete['Linemate Rel. Goal Impact'] = complete['Composite Goal Impact'] - (complete['Extraneous GF']-complete['Composite Goal Impact'])
628
- complete['Net Goal Impact'] = complete['Extraneous GF'] - complete['Extraneous GA']
629
- complete['Net xGoal Impact'] = complete['Extraneous xGF'] - complete['Extraneous xGA']
630
-
800
+ complete['EGi'] = complete['INDV-SRI']+complete['INDV-SQI']+complete['INDV-FNI']
801
+ complete['ExGi'] = complete['INDV-SRI']+complete['INDV-SQI']
802
+ complete['EGF'] = complete['OOFF-SRI']+complete['OOFF-SQI']+complete['OOFF-FNI']
803
+ complete['ExGF'] = complete['OOFF-SRI']+complete['OOFF-SQI']
804
+ complete['EGA'] = complete['ODEF-SRI']+complete['ODEF-SQI']+complete['ODEF-FNI']
805
+ complete['ExGA'] = complete['ODEF-SRI']+complete['ODEF-SQI']
806
+
807
+ #...and their percentiles
808
+ complete['EGi-P'] = complete['EGi'].rank(pct=True)
809
+ complete['ExGi-P'] = complete['ExGi'].rank(pct=True)
810
+ complete['EGF-P'] = complete['EGF'].rank(pct=True)
811
+ complete['ExGF-P'] = complete['ExGF'].rank(pct=True)
812
+ complete['EGA-P'] = complete['EGA'].rank(pct=True)
813
+ complete['ExGA-P'] = complete['ExGA'].rank(pct=True)
814
+
815
+ complete['EGA-P'] = 1-complete['EGA']
816
+ complete['ExGA-P'] = 1-complete['ExGA']
817
+
818
+ #...and then their totals
819
+ complete['EGi-T'] = (complete['EGi']/60)*complete['TOI']
820
+ complete['ExGi-T'] = (complete['ExGi']/60)*complete['TOI']
821
+ complete['EGF-T'] = (complete['EGF']/60)*complete['TOI']
822
+ complete['ExGF-T'] = (complete['ExGF']/60)*complete['TOI']
823
+ complete['EGA-T'] = (complete['EGA']/60)*complete['TOI']
824
+ complete['ExGA-T'] = (complete['ExGA']/60)*complete['TOI']
825
+
826
+ #Goal Composites...
827
+ complete['LiEG'] = complete['EGF'] - complete['EGi']
828
+ complete['LiExG'] = complete['ExGF'] - complete['ExGi']
829
+ complete['LiGIn'] = complete['LiEG']*complete['AC%']
830
+ complete['LixGIn'] = complete['LiExG']*complete['AC%']
831
+ complete['ALiGIn'] = complete['LiGIn']-complete['LixGIn']
832
+ complete['CompGI'] = complete['EGi'] + complete['LiGIn']
833
+ complete['LiRelGI'] = complete['CompGI'] - (complete['EGF']-complete['CompGI'])
834
+ complete['NetGI'] = complete['EGF'] - complete['EGA']
835
+ complete['NetxGI'] = complete['ExGF'] - complete['ExGA']
836
+
837
+ #...and their percentiles
838
+ complete['LiEG-P'] = complete['LiEG'].rank(pct=True)
839
+ complete['LiExG-P'] = complete['LiExG'].rank(pct=True)
840
+ complete['LiGIn-P'] = complete['LiGIn'].rank(pct=True)
841
+ complete['LixGIn-P'] = complete['LixGIn'].rank(pct=True)
842
+ complete['ALiGIn-P'] = complete['ALiGIn'].rank(pct=True)
843
+ complete['CompGI-P'] = complete['CompGI'].rank(pct=True)
844
+ complete['LiRelGI-P'] = complete['LiRelGI'].rank(pct=True)
845
+ complete['NetGI-P'] = complete['NetGI'].rank(pct=True)
846
+ complete['NetxGI-P'] = complete['NetxGI'].rank(pct=True)
847
+
848
+ #..and then their totals
849
+ complete['LiEG-T'] = (complete['LiEG']/60)*complete['TOI']
850
+ complete['LiExG-T'] = (complete['LiExG']/60)*complete['TOI']
851
+ complete['LiGIn-T'] = (complete['LiGIn']/60)*complete['TOI']
852
+ complete['LixGIn-T'] = (complete['LixGIn']/60)*complete['TOI']
853
+ complete['ALiGIn-T'] = (complete['ALiGIn']/60)*complete['TOI']
854
+ complete['CompGI-T'] = (complete['CompGI']/60)*complete['TOI']
855
+ complete['LiRelGI-T'] = (complete['LiRelGI']/60)*complete['TOI']
856
+ complete['NetGI-T'] = (complete['NetGI']/60)*complete['TOI']
857
+ complete['NetxGI-T'] = (complete['NetxGI']/60)*complete['TOI']
858
+
859
+ #Add back skaters with less than 150 minutes TOI
860
+ df = pd.concat([complete,non_players]).drop(columns=['fsh','fenwick','xg_fen','xg','g','finishing']).sort_values(['Player','Season','Team','ID'])
631
861
  #Return: skater stats with shooting impacts
632
- return complete.drop(columns=['fsh','fenwick','xg_fen','xg','g','finishing']).sort_values(['Player','Season','Team','ID'])
862
+ return df
633
863
 
634
- def nhl_calculate_stats(pbp,type,season_types,game_strength,roster_path="rosters/nhl_rosters.csv",xg="moneypuck",shot_impact=False):
864
+ def nhl_calculate_stats(pbp,type,season_types,game_strength,split_game=False,roster_path="rosters/nhl_rosters.csv",shot_impact=False):
635
865
  #Given play-by-play, seasonal information, game_strength, rosters, and xG model, return aggregated stats
636
866
  # param 'pbp' - play-by-play dataframe
637
- # param 'type' - type of stats to calculate ('skater', 'goaltender', or 'team')
867
+ # param 'type' - type of stats to calculate ('skater', 'goalie', or 'team')
638
868
  # param 'season' - season or timeframe of events in play-by-play
639
869
  # param 'season_type' - list of season types (preseason, regular season, or playoffs) to include in aggregation
640
870
  # param 'game_strength' - list of game_strengths to include in aggregation
871
+ # param 'split_game' - boolean which if true groups aggregation by game
641
872
  # param 'roster_path' - path to roster file
642
- # param 'xg' - xG model to apply to pbp for aggregation
643
873
  # param 'shot_impact' - boolean determining if the shot impact model will be applied to the dataset
644
874
 
645
- print(f"Calculating statistics for all games in the provided play-by-play data...\nSeasons included: {pbp['season'].drop_duplicates().to_list()}...")
875
+ print(f"Calculating statistics for all games in the provided play-by-play data at {game_strength} for {type}s...\nSeasons included: {pbp['season'].drop_duplicates().to_list()}...")
646
876
  start = time.perf_counter()
647
877
 
648
- #Add extra data and apply team changes
649
- pbp = prep_xG_data(pbp).replace(convert_team_abbr)
650
-
651
878
  #Check if xG column exists and apply model if it does not
652
879
  try:
653
880
  pbp['xG']
654
- except KeyError:
655
- if xg == 'wsba':
656
- pbp = wsba_xG(pbp)
657
- else:
658
- pbp = moneypuck_xG(pbp)
881
+ except KeyError:
882
+ pbp = wsba_xG(pbp)
659
883
 
660
- #Filter by season types and remove shootouts
661
- pbp = pbp.loc[(pbp['season_type'].isin(season_types)) & (pbp['period'] < 5)]
884
+ #Filter by season types, remove shootouts, remove shots with no coordinates, and remove shots on empty nets
885
+ pbp_noshot = pbp.loc[(pbp['season_type'].isin(season_types)) & ~(pbp['event_type'].isin(fenwick_events))]
886
+
887
+ #Include everything when strengths is set to 'all'
888
+ if game_strength == 'all':
889
+ mask = ((pbp['event_type'].isin(fenwick_events)) & (pbp['empty_net']<1))
890
+ else:
891
+ mask = ((pbp['event_type'].isin(fenwick_events)) & (pbp['empty_net']<1) & (pbp['x'].notna()) & (pbp['y'].notna()))
662
892
 
893
+ pbp_shot = pbp.loc[(pbp['season_type'].isin(season_types)) & mask]
894
+
895
+ pbp = pd.concat([pbp_shot,pbp_noshot])
896
+
663
897
  #Convert all columns with player ids to float in order to avoid merging errors
664
898
  for col in get_col():
665
899
  if "_id" in col:
666
900
  try: pbp[col] = pbp[col].astype(float)
667
901
  except KeyError: continue
668
902
 
669
- # Filter by game strength if not "all"
670
- if game_strength != "all":
671
- pbp = pbp.loc[pbp['strength_state'].isin(game_strength)]
903
+ #Split by game if specified
904
+ if split_game:
905
+ second_group = ['season','game_id']
906
+ else:
907
+ second_group = ['season']
672
908
 
673
909
  #Split calculation
674
- if type == 'team':
675
- complete = calc_team(pbp)
910
+ if type == 'goalie':
911
+ complete = calc_goalie(pbp,game_strength,second_group)
676
912
 
677
913
  #Set TOI to minute
678
914
  complete['TOI'] = complete['TOI']/60
679
915
 
680
916
  #Add per 60 stats
681
- for stat in per_sixty[7:13]:
917
+ for stat in ['FF','FA','xGF','xGA','GF','GA','CF','CA','GSAx']:
682
918
  complete[f'{stat}/60'] = (complete[stat]/complete['TOI'])*60
919
+
920
+ complete['GF%'] = complete['GF']/(complete['GF']+complete['GA'])
921
+ complete['xGF%'] = complete['xGF']/(complete['xGF']+complete['xGA'])
922
+ complete['FF%'] = complete['FF']/(complete['FF']+complete['FA'])
923
+ complete['CF%'] = complete['CF']/(complete['CF']+complete['CA'])
924
+
925
+ #Remove entries with no ID listed
926
+ complete = complete.loc[complete['ID'].notna()]
927
+
928
+ #Import rosters and player info
929
+ rosters = pd.read_csv(roster_path)
930
+ names = rosters[['id','fullName',
931
+ 'headshot','positionCode','shootsCatches',
932
+ 'heightInInches','weightInPounds',
933
+ 'birthDate','birthCountry']].drop_duplicates(subset=['id','fullName'],keep='last')
934
+
935
+ #Add names
936
+ complete = pd.merge(complete,names,how='left',left_on='ID',right_on='id')
937
+
938
+ #Rename if there are no missing names
939
+ complete = complete.rename(columns={'fullName':'Goalie',
940
+ 'headshot':'Headshot',
941
+ 'positionCode':'Position',
942
+ 'shootsCatches':'Handedness',
943
+ 'heightInInches':'Height (in)',
944
+ 'weightInPounds':'Weight (lbs)',
945
+ 'birthDate':'Birthday',
946
+ 'birthCountry':'Nationality'})
947
+
948
+ #WSBA
949
+ complete['WSBA'] = complete['Goalie']+complete['Team']+complete['Season'].astype(str)
950
+
951
+ #Add player age
952
+ complete['Birthday'] = pd.to_datetime(complete['Birthday'])
953
+ complete['season_year'] = complete['Season'].astype(str).str[4:8].astype(int)
954
+ complete['Age'] = complete['season_year'] - complete['Birthday'].dt.year
955
+
956
+ #Find player headshot
957
+ complete['Headshot'] = 'https://assets.nhle.com/mugs/nhl/'+complete['Season'].astype(str)+'/'+complete['Team']+'/'+complete['ID'].astype(int).astype(str)+'.png'
683
958
 
684
959
  end = time.perf_counter()
685
960
  length = end-start
686
961
  print(f'...finished in {(length if length <60 else length/60):.2f} {'seconds' if length <60 else 'minutes'}.')
687
- #Apply shot impacts if necessary (Note: this will remove skaters with fewer than 150 minutes of TOI due to the shot impact TOI rule)
962
+
963
+ head = ['Goalie','ID','Game'] if 'Game' in complete.columns else ['Goalie','ID']
964
+ complete = complete[head+[
965
+ "Season","Team",'WSBA',
966
+ 'Headshot','Position','Handedness',
967
+ 'Height (in)','Weight (lbs)',
968
+ 'Birthday','Age','Nationality',
969
+ 'GP','TOI',
970
+ "GF","FF","xGF","xGF/FF","GF/xGF","FshF%",
971
+ "GA","FA","xGA","xGA/FA","GA/xGA","FshA%",
972
+ 'CF','CA',
973
+ 'GSAx',
974
+ 'RushF','RushA','RushFxG','RushAxG','RushFG','RushAG'
975
+ ]+[f'{stat}/60' for stat in ['FF','FA','xGF','xGA','GF','GA','CF','CA','GSAx']]]
976
+
977
+ #Apply shot impacts if necessary
688
978
  if shot_impact:
689
- return nhl_shooting_impacts(complete,True)
690
- else:
691
- return complete
979
+ complete = nhl_shooting_impacts(complete,'goalie')
980
+
981
+ end = time.perf_counter()
982
+ length = end-start
983
+ print(f'...finished in {(length if length <60 else length/60):.2f} {'seconds' if length <60 else 'minutes'}.')
984
+
985
+ return complete
986
+
987
+ elif type == 'team':
988
+ complete = calc_team(pbp,game_strength,second_group)
989
+
990
+ #WSBA
991
+ complete['WSBA'] = complete['Team']+complete['Season'].astype(str)
992
+
993
+ #Set TOI to minute
994
+ complete['TOI'] = complete['TOI']/60
995
+
996
+ #Add per 60 stats
997
+ for stat in per_sixty[10:len(per_sixty)]:
998
+ complete[f'{stat}/60'] = (complete[stat]/complete['TOI'])*60
999
+
1000
+ complete['GF%'] = complete['GF']/(complete['GF']+complete['GA'])
1001
+ complete['xGF%'] = complete['xGF']/(complete['xGF']+complete['xGA'])
1002
+ complete['FF%'] = complete['FF']/(complete['FF']+complete['FA'])
1003
+ complete['CF%'] = complete['CF']/(complete['CF']+complete['CA'])
1004
+
1005
+ head = ['Team','Game'] if 'Game' in complete.columns else ['Team']
1006
+ complete = complete[head+[
1007
+ 'Season','WSBA',
1008
+ 'GP','TOI',
1009
+ "GF","FF","xGF","xGF/FF","GF/xGF","FshF%",
1010
+ "GA","FA","xGA","xGA/FA","GA/xGA","FshA%",
1011
+ 'CF','CA',
1012
+ 'GF%','FF%','xGF%','CF%',
1013
+ 'HF','HA','HF%',
1014
+ 'Penl','Penl2','Penl5','PIM','Draw','PENL%',
1015
+ 'Give','Take','PM%',
1016
+ 'Block',
1017
+ 'RushF','RushA','RushFxG','RushAxG','RushFG','RushAG'
1018
+ ]+[f'{stat}/60' for stat in per_sixty[10:len(per_sixty)]]]
1019
+ #Apply shot impacts if necessary
1020
+ if shot_impact:
1021
+ complete = nhl_shooting_impacts(complete,'team')
1022
+
1023
+ end = time.perf_counter()
1024
+ length = end-start
1025
+ print(f'...finished in {(length if length <60 else length/60):.2f} {'seconds' if length <60 else 'minutes'}.')
1026
+
1027
+ return complete
692
1028
  else:
693
- indv_stats = calc_indv(pbp)
694
- onice_stats = calc_onice(pbp)
1029
+ indv_stats = calc_indv(pbp,game_strength,second_group)
1030
+ onice_stats = calc_onice(pbp,game_strength,second_group)
695
1031
 
696
1032
  #IDs sometimes set as objects
697
1033
  indv_stats['ID'] = indv_stats['ID'].astype(float)
698
1034
  onice_stats['ID'] = onice_stats['ID'].astype(float)
699
1035
 
700
1036
  #Merge and add columns for extra stats
701
- complete = pd.merge(indv_stats,onice_stats,how="outer",on=['ID','Team','Season'])
1037
+ complete = pd.merge(indv_stats,onice_stats,how="outer",on=['ID','Team','Season']+(['Game'] if 'game_id' in second_group else []))
702
1038
  complete['GC%'] = complete['Gi']/complete['GF']
703
1039
  complete['AC%'] = (complete['A1']+complete['A2'])/complete['GF']
704
1040
  complete['GI%'] = (complete['Gi']+complete['A1']+complete['A2'])/complete['GF']
705
1041
  complete['FC%'] = complete['Fi']/complete['FF']
706
1042
  complete['xGC%'] = complete['xGi']/complete['xGF']
1043
+ complete['GF%'] = complete['GF']/(complete['GF']+complete['GA'])
1044
+ complete['xGF%'] = complete['xGF']/(complete['xGF']+complete['xGA'])
1045
+ complete['FF%'] = complete['FF']/(complete['FF']+complete['FA'])
1046
+ complete['CF%'] = complete['CF']/(complete['CF']+complete['CA'])
707
1047
 
708
1048
  #Remove entries with no ID listed
709
1049
  complete = complete.loc[complete['ID'].notna()]
@@ -731,10 +1071,6 @@ def nhl_calculate_stats(pbp,type,season_types,game_strength,roster_path="rosters
731
1071
  #Set TOI to minute
732
1072
  complete['TOI'] = complete['TOI']/60
733
1073
 
734
- #Add per 60 stats
735
- for stat in per_sixty:
736
- complete[f'{stat}/60'] = (complete[stat]/complete['TOI'])*60
737
-
738
1074
  #Add player age
739
1075
  complete['Birthday'] = pd.to_datetime(complete['Birthday'])
740
1076
  complete['season_year'] = complete['Season'].astype(str).str[4:8].astype(int)
@@ -743,42 +1079,56 @@ def nhl_calculate_stats(pbp,type,season_types,game_strength,roster_path="rosters
743
1079
  #Find player headshot
744
1080
  complete['Headshot'] = 'https://assets.nhle.com/mugs/nhl/'+complete['Season'].astype(str)+'/'+complete['Team']+'/'+complete['ID'].astype(int).astype(str)+'.png'
745
1081
 
746
- end = time.perf_counter()
747
- length = end-start
748
1082
  #Remove goalies that occasionally appear in a set
749
1083
  complete = complete.loc[complete['Position']!='G']
750
1084
  #Add WSBA ID
751
1085
  complete['WSBA'] = complete['Player']+complete['Season'].astype(str)+complete['Team']
752
1086
 
1087
+ #Add per 60 stats
1088
+ for stat in per_sixty:
1089
+ complete[f'{stat}/60'] = (complete[stat]/complete['TOI'])*60
1090
+
753
1091
  #Shot Type Metrics
754
1092
  type_metrics = []
755
1093
  for type in shot_types:
756
1094
  for stat in per_sixty[:3]:
757
1095
  type_metrics.append(f'{type.capitalize()}{stat}')
758
1096
 
759
- complete = complete[[
760
- 'Player','ID',
1097
+ head = ['Player','ID','Game'] if 'Game' in complete.columns else ['Player','ID']
1098
+ complete = complete[head+[
761
1099
  "Season","Team",'WSBA',
762
1100
  'Headshot','Position','Handedness',
763
1101
  'Height (in)','Weight (lbs)',
764
1102
  'Birthday','Age','Nationality',
765
1103
  'GP','TOI',
766
1104
  "Gi","A1","A2",'P1','P',
1105
+ 'Give','Take','PM%','HF','HA','HF%',
767
1106
  "Fi","xGi",'xGi/Fi',"Gi/xGi","Fshi%",
768
1107
  "GF","FF","xGF","xGF/FF","GF/xGF","FshF%",
769
1108
  "GA","FA","xGA","xGA/FA","GA/xGA","FshA%",
1109
+ 'Ci','CF','CA','CF%',
1110
+ 'FF%','xGF%','GF%',
770
1111
  'Rush',"Rush xG",'Rush G',"GC%","AC%","GI%","FC%","xGC%",
1112
+ 'F','FW','FL','F%',
1113
+ 'Penl','Penl2','Penl5',
1114
+ 'Draw','PIM','PENL%',
1115
+ 'Block',
1116
+ 'OZF','NZF','DZF',
1117
+ 'OZF%','NZF%','DZF%',
771
1118
  ]+[f'{stat}/60' for stat in per_sixty]+type_metrics].fillna(0).sort_values(['Player','Season','Team','ID'])
772
1119
 
773
- print(f'...finished in {(length if length <60 else length/60):.2f} {'seconds' if length <60 else 'minutes'}.')
774
1120
  #Apply shot impacts if necessary (Note: this will remove skaters with fewer than 150 minutes of TOI due to the shot impact TOI rule)
775
1121
  if shot_impact:
776
- return nhl_shooting_impacts(complete,False)
777
- else:
778
- return complete
1122
+ complete = nhl_shooting_impacts(complete,'skater')
1123
+
1124
+ end = time.perf_counter()
1125
+ length = end-start
1126
+ print(f'...finished in {(length if length <60 else length/60):.2f} {'seconds' if length <60 else 'minutes'}.')
779
1127
 
780
- def nhl_plot_skaters_shots(pbp,skater_dict,strengths,marker_dict=event_markers,onice = 'indv',title = True,legend=False,xg='moneypuck'):
781
- #Returns list of plots for specified skaters
1128
+ return complete
1129
+
1130
+ def nhl_plot_skaters_shots(pbp,skater_dict,strengths,marker_dict=event_markers,onice = 'indv',title = True,legend=False):
1131
+ #Returns dict of plots for specified skaters
782
1132
  # param 'pbp' - pbp to plot data
783
1133
  # param 'skater_dict' - skaters to plot shots for (format: {'Patrice Bergeron':['20242025','BOS']})
784
1134
  # param 'strengths' - strengths to include in plotting
@@ -790,18 +1140,19 @@ def nhl_plot_skaters_shots(pbp,skater_dict,strengths,marker_dict=event_markers,o
790
1140
 
791
1141
  print(f'Plotting the following skater shots: {skater_dict}...')
792
1142
 
793
- #Iterate through games, adding plot to list
794
- skater_plots = []
1143
+ #Iterate through skaters, adding plots to dict
1144
+ skater_plots = {}
795
1145
  for skater in skater_dict.keys():
796
1146
  skater_info = skater_dict[skater]
797
1147
  title = f'{skater} Fenwick Shots for {skater_info[1]} in {skater_info[0][2:4]}-{skater_info[0][6:8]}' if title else ''
798
- skater_plots.append(plot_skater_shots(pbp,skater,skater_info[0],skater_info[1],strengths,title,marker_dict,onice,legend,xg))
1148
+ #Key is formatted as PLAYERSEASONTEAM (i.e. PATRICE BERGERON20212022BOS)
1149
+ skater_plots.update({f'{skater}{skater_info[0]}{skater_info[1]}':[plot_skater_shots(pbp,skater,skater_info[0],skater_info[1],strengths,title,marker_dict,onice,legend)]})
799
1150
 
800
1151
  #Return: list of plotted skater shot charts
801
1152
  return skater_plots
802
1153
 
803
- def nhl_plot_games(pbp,events,strengths,game_ids='all',marker_dict=event_markers,team_colors={'away':'primary','home':'primary'},legend=False,xg='moneypuck'):
804
- #Returns list of plots for specified games
1154
+ def nhl_plot_games(pbp,events,strengths,game_ids='all',marker_dict=event_markers,team_colors={'away':'primary','home':'primary'},legend=False):
1155
+ #Returns dict of plots for specified games
805
1156
  # param 'pbp' - pbp to plot data
806
1157
  # param 'events' - type of events to plot
807
1158
  # param 'strengths' - strengths to include in plotting
@@ -816,8 +1167,10 @@ def nhl_plot_games(pbp,events,strengths,game_ids='all',marker_dict=event_markers
816
1167
 
817
1168
  print(f'Plotting the following games: {game_ids}...')
818
1169
 
819
- #Iterate through games, adding plot to list
820
- game_plots = [plot_game_events(pbp,game,events,strengths,marker_dict,team_colors,legend,xg) for game in game_ids]
1170
+ game_plots = {}
1171
+ #Iterate through games, adding plot to dict
1172
+ for game in game_ids:
1173
+ game_plots.update({game:[plot_game_events(pbp,game,events,strengths,marker_dict,team_colors,legend)]})
821
1174
 
822
1175
  #Return: list of plotted game events
823
1176
  return game_plots
@@ -853,7 +1206,7 @@ def repo_load_pbp(seasons = []):
853
1206
 
854
1207
  #Add parquet to total
855
1208
  print(f'Loading play-by-play from the following seasons: {seasons}...')
856
- dfs = [pd.read_parquet(f"https://github.com/owensingh38/wsba_hockey/raw/refs/heads/main/src/wsba_hockey/pbp/parquet/nhl_pbp_{season}.parquet") for season in seasons]
1209
+ dfs = [pd.read_parquet(f"https://f005.backblazeb2.com/file/weakside-breakout/pbp/{season}.parquet") for season in seasons]
857
1210
 
858
1211
  return pd.concat(dfs)
859
1212
 
@@ -861,9 +1214,3 @@ def repo_load_seasons():
861
1214
  #List of available seasons to scrape
862
1215
 
863
1216
  return seasons
864
-
865
- def admin_convert_to_parquet(seasons):
866
- for season in seasons:
867
- load = pd.read_csv(f'pbp/csv/nhl_pbp_{season}.csv')
868
-
869
- load.to_parquet(f'pbp/parquet/nhl_pbp_{season}.parquet',index=False)