wsba-hockey 1.0.3__py3-none-any.whl → 1.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (130) hide show
  1. wsba_hockey/data_pipelines.py +183 -0
  2. wsba_hockey/evidence/weakside-breakout/node_modules/duckdb/vendor.py +146 -0
  3. wsba_hockey/evidence/weakside-breakout/node_modules/flatted/python/flatted.py +149 -0
  4. wsba_hockey/evidence/weakside-breakout/node_modules/flatted/python/test.py +63 -0
  5. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/gyp_main.py +45 -0
  6. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSNew.py +367 -0
  7. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSProject.py +206 -0
  8. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSSettings.py +1270 -0
  9. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSSettings_test.py +1547 -0
  10. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSToolFile.py +59 -0
  11. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSUserFile.py +153 -0
  12. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSUtil.py +271 -0
  13. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSVersion.py +574 -0
  14. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/__init__.py +690 -0
  15. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/common.py +661 -0
  16. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/common_test.py +78 -0
  17. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/easy_xml.py +165 -0
  18. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/easy_xml_test.py +109 -0
  19. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/flock_tool.py +55 -0
  20. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/__init__.py +0 -0
  21. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/analyzer.py +808 -0
  22. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/android.py +1173 -0
  23. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/cmake.py +1321 -0
  24. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/compile_commands_json.py +120 -0
  25. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/dump_dependency_json.py +103 -0
  26. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/eclipse.py +464 -0
  27. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/gypd.py +89 -0
  28. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/gypsh.py +58 -0
  29. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/make.py +2714 -0
  30. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/msvs.py +3981 -0
  31. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/msvs_test.py +44 -0
  32. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/ninja.py +2936 -0
  33. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/ninja_test.py +55 -0
  34. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/xcode.py +1394 -0
  35. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/xcode_test.py +25 -0
  36. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/input.py +3130 -0
  37. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/input_test.py +98 -0
  38. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/mac_tool.py +771 -0
  39. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/msvs_emulation.py +1271 -0
  40. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/ninja_syntax.py +174 -0
  41. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/simple_copy.py +61 -0
  42. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/win_tool.py +374 -0
  43. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/xcode_emulation.py +1939 -0
  44. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/xcode_ninja.py +302 -0
  45. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/xcodeproj_file.py +3197 -0
  46. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/xml_fix.py +65 -0
  47. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/test_gyp.py +261 -0
  48. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/tools/graphviz.py +102 -0
  49. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/tools/pretty_gyp.py +156 -0
  50. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/tools/pretty_sln.py +181 -0
  51. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/tools/pretty_vcproj.py +339 -0
  52. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/test/fixtures/test-charmap.py +31 -0
  53. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/update-gyp.py +64 -0
  54. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/gyp_main.py +45 -0
  55. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSNew.py +367 -0
  56. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSProject.py +206 -0
  57. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSSettings.py +1270 -0
  58. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSSettings_test.py +1547 -0
  59. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSToolFile.py +59 -0
  60. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSUserFile.py +153 -0
  61. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSUtil.py +271 -0
  62. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSVersion.py +574 -0
  63. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/__init__.py +666 -0
  64. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/common.py +654 -0
  65. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/common_test.py +78 -0
  66. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/easy_xml.py +165 -0
  67. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/easy_xml_test.py +109 -0
  68. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/flock_tool.py +55 -0
  69. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/__init__.py +0 -0
  70. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/analyzer.py +808 -0
  71. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/android.py +1173 -0
  72. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/cmake.py +1321 -0
  73. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/compile_commands_json.py +120 -0
  74. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/dump_dependency_json.py +103 -0
  75. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/eclipse.py +464 -0
  76. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/gypd.py +89 -0
  77. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/gypsh.py +58 -0
  78. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/make.py +2518 -0
  79. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/msvs.py +3978 -0
  80. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/msvs_test.py +44 -0
  81. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/ninja.py +2936 -0
  82. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/ninja_test.py +55 -0
  83. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/xcode.py +1394 -0
  84. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/xcode_test.py +25 -0
  85. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/input.py +3137 -0
  86. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/input_test.py +98 -0
  87. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/mac_tool.py +771 -0
  88. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/msvs_emulation.py +1271 -0
  89. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/ninja_syntax.py +174 -0
  90. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/simple_copy.py +61 -0
  91. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/win_tool.py +374 -0
  92. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/xcode_emulation.py +1939 -0
  93. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/xcode_ninja.py +302 -0
  94. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/xcodeproj_file.py +3197 -0
  95. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/xml_fix.py +65 -0
  96. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/setup.py +42 -0
  97. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/test_gyp.py +260 -0
  98. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/tools/graphviz.py +102 -0
  99. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/tools/pretty_gyp.py +156 -0
  100. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/tools/pretty_sln.py +181 -0
  101. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/tools/pretty_vcproj.py +339 -0
  102. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/test/fixtures/test-charmap.py +31 -0
  103. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/update-gyp.py +46 -0
  104. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/game_stats/app.py +401 -0
  105. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/game_stats/name_fix.py +47 -0
  106. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/heatmaps/app.py +108 -0
  107. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/heatmaps/plot.py +93 -0
  108. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/heatmaps/rink_plot.py +245 -0
  109. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/matchups/app.py +145 -0
  110. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/matchups/plot.py +77 -0
  111. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/matchups/rink_plot.py +245 -0
  112. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/pbp/app.py +389 -0
  113. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/pbp/plot.py +70 -0
  114. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/pbp/rink_plot.py +245 -0
  115. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/skater/app.py +110 -0
  116. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/skater/plot.py +58 -0
  117. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/skater/rink_plot.py +245 -0
  118. wsba_hockey/tools/agg.py +242 -53
  119. wsba_hockey/tools/plotting.py +12 -17
  120. wsba_hockey/tools/scraping.py +149 -258
  121. wsba_hockey/tools/xg_model.py +357 -311
  122. wsba_hockey/workspace.py +22 -117
  123. wsba_hockey/wsba_main.py +493 -165
  124. {wsba_hockey-1.0.3.dist-info → wsba_hockey-1.0.4.dist-info}/METADATA +1 -1
  125. wsba_hockey-1.0.4.dist-info/RECORD +135 -0
  126. {wsba_hockey-1.0.3.dist-info → wsba_hockey-1.0.4.dist-info}/WHEEL +1 -1
  127. wsba_hockey/stats/calculate_viz/shot_impact.py +0 -2
  128. wsba_hockey-1.0.3.dist-info/RECORD +0 -19
  129. {wsba_hockey-1.0.3.dist-info → wsba_hockey-1.0.4.dist-info}/licenses/LICENSE +0 -0
  130. {wsba_hockey-1.0.3.dist-info → wsba_hockey-1.0.4.dist-info}/top_level.txt +0 -0
wsba_hockey/wsba_main.py CHANGED
@@ -1,13 +1,12 @@
1
1
  import requests as rs
2
2
  import pandas as pd
3
- import numpy as np
4
- from datetime import datetime, timedelta, date
5
3
  import time
6
4
  import random
7
- from .tools.scraping import *
8
- from .tools.xg_model import *
9
- from .tools.agg import *
10
- from .tools.plotting import *
5
+ from datetime import datetime, timedelta, date
6
+ from tools.scraping import *
7
+ from tools.xg_model import *
8
+ from tools.agg import *
9
+ from tools.plotting import *
11
10
 
12
11
  ### WSBA HOCKEY ###
13
12
  ## Provided below are all integral functions in the WSBA Hockey Python package. ##
@@ -59,7 +58,7 @@ convert_team_abbr = {'L.A':'LAK',
59
58
  'T.B':'TBL',
60
59
  'PHX':'ARI'}
61
60
 
62
- per_sixty = ['Fi','xGi','Gi','A1','A2','P1','P','FF','FA','xGF','xGA','GF','GA']
61
+ per_sixty = ['Fi','xGi','Gi','A1','A2','P1','P','OZF','NZF','DZF','FF','FA','xGF','xGA','GF','GA','CF','CA','HF','HA','Give','Take','Penl','Penl2','Penl5','Draw','Block']
63
62
 
64
63
  #Some games in the API are specifically known to cause errors in scraping.
65
64
  #This list is updated as frequently as necessary
@@ -73,15 +72,12 @@ known_probs ={
73
72
  '2009020658':'Missing shifts data for game between New York Islanders and Dallas.',
74
73
  '2009020885':'Missing shifts data for game between Sharks and Blue Jackets.',
75
74
  '2010020124':'Game between Capitals and Hurricanes is sporadically missing player on-ice data',
75
+ '2012020018':'HTML events contain mislabeled events.',
76
76
  '2013020971':'On March 10th, 2014, Stars forward Rich Peverley suffered from a cardiac episode midgame and as a result, the remainder of the game was postponed. \nThe game resumed on April 9th, and the only goal scorer in the game, Blue Jackets forward Nathan Horton, did not appear in the resumed game due to injury. Interestingly, Horton would never play in the NHL again.',
77
77
  '2018021133':'Game between Lightning and Capitals has incorrectly labeled event teams (i.e. WSH TAKEAWAY - #71 CIRELLI (Cirelli is a Tampa Bay skater in this game)).',
78
78
  '2019020876':'Due to the frightening collapse of Blues defensemen Jay Bouwmeester, a game on February 2nd, 2020 between the Ducks and Blues was postponed. \nWhen the game resumed, Ducks defensemen Hampus Lindholm, who assisted on a goal in the inital game, did not play in the resumed match.'
79
79
  }
80
80
 
81
- name_change = {
82
- "":"",
83
- }
84
-
85
81
  shot_types = ['wrist','deflected','tip-in','slap','backhand','snap','wrap-around','poke','bat','cradle','between-legs']
86
82
 
87
83
  new = 2024
@@ -107,14 +103,17 @@ standings_end = {
107
103
  '20242025':'04-17'
108
104
  }
109
105
 
106
+ events = ['faceoff','hit','giveaway','takeaway','blocked-shot','missed-shot','shot-on-goal','goal','penalty']
107
+
110
108
  ## SCRAPE FUNCTIONS ##
111
- def nhl_scrape_game(game_ids,split_shifts = False, remove = ['period-start','period-end','challenge','stoppage'],verbose = False, errors = False):
109
+ def nhl_scrape_game(game_ids,split_shifts = False, remove = ['period-start','period-end','challenge','stoppage','shootout-complete','game-end'],verbose = False, sources = False, errors = False):
112
110
  #Given a set of game_ids (NHL API), return complete play-by-play information as requested
113
111
  # param 'game_ids' - NHL game ids (or list formatted as ['random', num_of_games, start_year, end_year])
114
112
  # param 'split_shifts' - boolean which splits pbp and shift events if true
115
113
  # param 'remove' - list of events to remove from final dataframe
116
114
  # param 'xg' - xG model to apply to pbp for aggregation
117
115
  # param 'verbose' - boolean which adds additional event info if true
116
+ # param 'sources - boolean scraping the html and json sources to a master directory if true
118
117
  # param 'errors' - boolean returning game ids which did not scrape if true
119
118
 
120
119
  pbps = []
@@ -154,6 +153,7 @@ def nhl_scrape_game(game_ids,split_shifts = False, remove = ['period-start','per
154
153
  #Scrape each game
155
154
  #Track Errors
156
155
  error_ids = []
156
+ prog = 0
157
157
  for game_id in game_ids:
158
158
  print("Scraping data from game " + str(game_id) + "...",end="")
159
159
  start = time.perf_counter()
@@ -161,15 +161,25 @@ def nhl_scrape_game(game_ids,split_shifts = False, remove = ['period-start','per
161
161
  try:
162
162
  #Retrieve data
163
163
  info = get_game_info(game_id)
164
- data = combine_data(info)
164
+ data = combine_data(info, sources)
165
165
 
166
166
  #Append data to list
167
167
  pbps.append(data)
168
168
 
169
169
  end = time.perf_counter()
170
170
  secs = end - start
171
- print(f" finished in {secs:.2f} seconds.")
171
+ prog += 1
172
+
173
+ #Export if sources is true
174
+ if sources:
175
+ dirs = f'sources/{info['season']}/'
176
+
177
+ if not os.path.exists(dirs):
178
+ os.makedirs(dirs)
172
179
 
180
+ data.to_csv(f'{dirs}{info['game_id']}.csv',index=False)
181
+
182
+ print(f" finished in {secs:.2f} seconds. {prog}/{len(game_ids)} ({(prog/len(game_ids))*100:.2f}%)")
173
183
  except:
174
184
  #Games such as the all-star game and pre-season games will incur this error
175
185
  #Other games have known problems
@@ -180,7 +190,7 @@ def nhl_scrape_game(game_ids,split_shifts = False, remove = ['period-start','per
180
190
 
181
191
  #Track error
182
192
  error_ids.append(game_id)
183
-
193
+
184
194
  #Add all pbps together
185
195
  if len(pbps) == 0:
186
196
  print("\rNo data returned.")
@@ -256,30 +266,37 @@ def nhl_scrape_schedule(season,start = "09-01", end = "08-01"):
256
266
  #Handles dates which are over a year apart
257
267
  day = 365 + day
258
268
  for i in range(day):
259
- #For each day, call NHL api and retreive id, season, season_type (1,2,3), and gamecenter link
269
+ #For each day, call NHL api and retreive info on all games of selected game
260
270
  inc = start+timedelta(days=i)
261
271
  print("Scraping games on " + str(inc)[:10]+"...")
262
272
 
263
273
  get = rs.get(api+str(inc)[:10]).json()
264
- gameWeek = list(pd.json_normalize(get['gameWeek'])['games'])[0]
265
-
266
- for i in range(0,len(gameWeek)):
267
- game.append(pd.DataFrame({
268
- "id": [gameWeek[i]['id']],
269
- "season": [gameWeek[i]['season']],
270
- "season_type":[gameWeek[i]['gameType']],
271
- "away_team_abbr":[gameWeek[i]['awayTeam']['abbrev']],
272
- "home_team_abbr":[gameWeek[i]['homeTeam']['abbrev']],
273
- "gamecenter_link":[gameWeek[i]['gameCenterLink']]
274
- }))
275
-
274
+ gameWeek = pd.json_normalize(list(pd.json_normalize(get['gameWeek'])['games'])[0])
275
+
276
+ #Return nothing if there's nothing
277
+ if gameWeek.empty:
278
+ game.append(gameWeek)
279
+ else:
280
+ gameWeek['date'] = get['gameWeek'][0]['date']
281
+
282
+ gameWeek['season_type'] = gameWeek['gameType']
283
+ gameWeek['away_team_abbr'] = gameWeek['awayTeam.abbrev']
284
+ gameWeek['home_team_abbr'] = gameWeek['homeTeam.abbrev']
285
+ gameWeek['game_title'] = gameWeek['away_team_abbr'] + " @ " + gameWeek['home_team_abbr'] + " - " + gameWeek['date']
286
+ gameWeek['estStartTime'] = pd.to_datetime(gameWeek['startTimeUTC']).dt.tz_convert('US/Eastern').dt.strftime("%I:%M %p")
287
+
288
+ front_col = ['id','season','date','season_type','game_title','away_team_abbr','home_team_abbr','estStartTime']
289
+ gameWeek = gameWeek[front_col+[col for col in gameWeek.columns.to_list() if col not in front_col]]
290
+
291
+ game.append(gameWeek)
292
+
276
293
  #Concatenate all games
277
294
  df = pd.concat(game)
278
295
 
279
296
  #Return: specificed schedule data
280
297
  return df
281
298
 
282
- def nhl_scrape_season(season,split_shifts = False, season_types = [2,3], remove = ['period-start','period-end','game-end','challenge','stoppage'], start = "09-01", end = "08-01", local=False, local_path = "schedule/schedule.csv", verbose = False, errors = False):
299
+ def nhl_scrape_season(season,split_shifts = False, season_types = [2,3], remove = ['period-start','period-end','game-end','challenge','stoppage'], start = "09-01", end = "08-01", local=False, local_path = "schedule/schedule.csv", verbose = False, sources = False, errors = False):
283
300
  #Given season, scrape all play-by-play occuring within the season
284
301
  # param 'season' - NHL season to scrape
285
302
  # param 'split_shifts' - boolean which splits pbp and shift events if true
@@ -289,12 +306,21 @@ def nhl_scrape_season(season,split_shifts = False, season_types = [2,3], remove
289
306
  # param 'local' - boolean indicating whether to use local file to scrape game_ids
290
307
  # param 'local_path' - path of local file
291
308
  # param 'verbose' - boolean which adds additional event info if true
309
+ # param 'sources - boolean scraping the html and json sources to a master directory if true
292
310
  # param 'errors' - boolean returning game ids which did not scrape if true
293
311
 
294
312
  #Determine whether to use schedule data in repository or to scrape
295
- if local == True:
313
+ if local:
296
314
  load = pd.read_csv(local_path)
297
- load = load.loc[(load['season'].astype(str)==season)&(load['season_type'].isin(season_types))]
315
+ load['date'] = pd.to_datetime(load['date'])
316
+
317
+ start = f'{(season[0:4] if int(start[0:2])>=9 else season[4:8])}-{int(start[0:2])}-{int(start[3:5])}'
318
+ end = f'{(season[0:4] if int(end[0:2])>=9 else season[4:8])}-{int(end[0:2])}-{int(end[3:5])}'
319
+
320
+ load = load.loc[(load['season'].astype(str)==season)&
321
+ (load['season_type'].isin(season_types))&
322
+ (load['date']>=start)&(load['date']<=end)]
323
+
298
324
  game_ids = list(load['id'].astype(str))
299
325
  else:
300
326
  load = nhl_scrape_schedule(season,start,end)
@@ -310,31 +336,17 @@ def nhl_scrape_season(season,split_shifts = False, season_types = [2,3], remove
310
336
  start = time.perf_counter()
311
337
 
312
338
  #Perform scrape
313
- if split_shifts == True:
314
- data = nhl_scrape_game(game_ids,split_shifts=True,remove=remove,verbose=verbose,errors=errors)
339
+ if split_shifts:
340
+ data = nhl_scrape_game(game_ids,split_shifts=True,remove=remove,verbose=verbose,sources=sources,errors=errors)
315
341
  else:
316
- data = nhl_scrape_game(game_ids,remove=remove,verbose=verbose,errors=errors)
342
+ data = nhl_scrape_game(game_ids,remove=remove,verbose=verbose,sources=sources,errors=errors)
317
343
 
318
344
  end = time.perf_counter()
319
345
  secs = end - start
320
346
 
321
347
  print(f'Finished season scrape in {(secs/60)/60:.2f} hours.')
322
348
  #Return: Complete pbp and shifts data for specified season as well as dataframe of game_ids which failed to return data
323
- if split_shifts == True:
324
- pbp_dict = {'pbp':data['pbp'],
325
- 'shifts':data['shifts']}
326
-
327
- if errors:
328
- pbp_dict.update({'errors':data['errors']})
329
- return pbp_dict
330
- else:
331
- pbp = data
332
- if errors:
333
- pbp_dict = {'pbp':pbp,
334
- 'errors':data['errors']}
335
- return pbp_dict
336
- else:
337
- return pbp
349
+ return data
338
350
 
339
351
  def nhl_scrape_seasons_info(seasons = []):
340
352
  #Returns info related to NHL seasons (by default, all seasons are included)
@@ -449,17 +461,24 @@ def nhl_scrape_team_info(country = False):
449
461
 
450
462
  return data.sort_values(by=(['country3Code','countryCode','iocCode','countryName'] if country else ['fullName','triCode','id']))
451
463
 
452
- def nhl_scrape_player_data(player_id):
464
+ def nhl_scrape_player_data(player_ids):
453
465
  #Given player id, return player information
454
- api = f'https://api-web.nhle.com/v1/player/{player_id}/landing'
466
+ infos = []
467
+ for player_id in player_ids:
468
+ player_id = int(player_id)
469
+ api = f'https://api-web.nhle.com/v1/player/{player_id}/landing'
470
+
471
+ data = pd.json_normalize(rs.get(api).json())
455
472
 
456
- data = pd.json_normalize(rs.get(api).json())
473
+ #Add name column
474
+ data['fullName'] = (data['firstName.default'] + " " + data['lastName.default']).str.upper()
457
475
 
458
- #Add name column
459
- data['fullName'] = (data['firstName.default'] + " " + data['lastName.default']).str.upper()
476
+ #Append
477
+ infos.append(data)
460
478
 
479
+ df = pd.concat(infos)
461
480
  #Return: player data
462
- return data
481
+ return df
463
482
 
464
483
  def nhl_scrape_draft_rankings(arg = 'now', category = ''):
465
484
  #Given url argument for timeframe and prospect category, return draft rankings
@@ -478,12 +497,26 @@ def nhl_scrape_draft_rankings(arg = 'now', category = ''):
478
497
  #Return: prospect rankings
479
498
  return data
480
499
 
481
- def nhl_shooting_impacts(agg,team=False):
500
+ def nhl_apply_xG(pbp):
501
+ #Given play-by-play data, return this data with xG-related columns
502
+
503
+ #param 'pbp' - play-by-play data
504
+
505
+ print(f'Applying WSBA xG to model with seasons: {pbp['season'].drop_duplicates().to_list()}')
506
+ #Fix player data
507
+ #pbp = fix_players(pbp)
508
+
509
+ #Apply xG model
510
+ pbp = wsba_xG(pbp)
511
+
512
+ return pbp
513
+
514
+ def nhl_shooting_impacts(agg,type):
482
515
  #Given stats table generated from the nhl_calculate_stats function, return table with shot impacts
483
516
  #Only 5v5 is supported as of now
484
517
 
485
518
  #param 'agg' - stats table
486
- #param 'team' - boolean determining if team stats are calculated instead of skater stats
519
+ #param 'type' - type of stats to calculate ('skater', 'goalie', or 'team')
487
520
 
488
521
  #COMPOSITE IMPACT EVALUATIONS:
489
522
 
@@ -509,7 +542,7 @@ def nhl_shooting_impacts(agg,team=False):
509
542
 
510
543
  return rate+qual+fini
511
544
 
512
- if team:
545
+ if type == 'goalie':
513
546
  pos = agg
514
547
  for group in [('OOFF','F'),('ODEF','A')]:
515
548
  #Have to set this columns for compatibility with df.apply
@@ -536,35 +569,172 @@ def nhl_shooting_impacts(agg,team=False):
536
569
  pos[f'{group[0]}-SRI'] = pos['g'] - pos.apply(lambda x: goal_comp(avg_fen,x.xg_fen,x.xg,x.g,avg_fsh),axis=1)
537
570
  pos[f'{group[0]}-SQI'] = pos['g'] - pos.apply(lambda x: goal_comp(x.fenwick,avg_xg_fen,x.xg,x.g,avg_fsh),axis=1)
538
571
  pos[f'{group[0]}-FNI'] = pos['g'] - pos.apply(lambda x: goal_comp(x.fenwick,x.xg_fen,avg_xg,avg_g,avg_fsh),axis=1)
572
+
573
+ #Convert impacts to totals
574
+ #Calculate shot rate, shot quality, and finishing impacts
575
+ pos[f'{group[0]}-SRI-T'] = (pos[f'{group[0]}-SRI']/60)*pos['TOI']
576
+ pos[f'{group[0]}-SQI-T'] = (pos[f'{group[0]}-SQI']/60)*pos['TOI']
577
+ pos[f'{group[0]}-FNI-T'] = (pos[f'{group[0]}-FNI']/60)*pos['TOI']
539
578
 
579
+ #Rank per 60 stats
580
+ for stat in ['FF','FA','xGF','xGA','GF','GA','CF','CA','GSAx']:
581
+ pos[f'{stat}/60-P'] = pos[f'{stat}/60'].rank(pct=True)
582
+
583
+ #Flip percentiles for against stats
584
+ for stat in ['FA','xGA','GA','CA']:
585
+ pos[f'{stat}/60-P'] = 1-pos[f'{stat}/60-P']
586
+
540
587
  #Add extra metrics
541
588
  pos['RushF/60'] = (pos['RushF']/pos['TOI'])*60
542
589
  pos['RushA/60'] = (pos['RushA']/pos['TOI'])*60
543
- pos['Rushes FF'] = pos['RushF/60'].rank(pct=True)
544
- pos['Rushes FA'] = 1 - pos['RushA/60'].rank(pct=True)
590
+ pos['RushesFF'] = pos['RushF/60'].rank(pct=True)
591
+ pos['RushesFA'] = 1 - pos['RushA/60'].rank(pct=True)
545
592
  pos['RushFxG/60'] = (pos['RushFxG']/pos['TOI'])*60
546
593
  pos['RushAxG/60'] = (pos['RushAxG']/pos['TOI'])*60
547
- pos['Rushes xGF'] = pos['RushFxG/60'].rank(pct=True)
548
- pos['Rushes xGA'] = 1 - pos['RushAxG/60'].rank(pct=True)
594
+ pos['RushesxGF'] = pos['RushFxG/60'].rank(pct=True)
595
+ pos['RushesxGA'] = 1 - pos['RushAxG/60'].rank(pct=True)
549
596
  pos['RushFG/60'] = (pos['RushFG']/pos['TOI'])*60
550
597
  pos['RushAG/60'] = (pos['RushAG']/pos['TOI'])*60
551
- pos['Rushes GF'] = pos['RushFG/60'].rank(pct=True)
552
- pos['Rushes GA'] = 1 - pos['RushAG/60'].rank(pct=True)
598
+ pos['RushesGF'] = pos['RushFG/60'].rank(pct=True)
599
+ pos['RushesGA'] = 1 - pos['RushAG/60'].rank(pct=True)
553
600
 
554
601
  #Flip against metric percentiles
555
602
  pos['ODEF-SR'] = 1-pos['ODEF-SR']
556
603
  pos['ODEF-SQ'] = 1-pos['ODEF-SQ']
557
604
  pos['ODEF-FN'] = 1-pos['ODEF-FN']
558
605
 
606
+ #Extraneous Values
607
+ pos['EGF'] = pos['OOFF-SRI']+pos['OOFF-SQI']+pos['OOFF-FNI']
608
+ pos['ExGF'] = pos['OOFF-SRI']+pos['OOFF-SQI']
609
+ pos['EGA'] = pos['ODEF-SRI']+pos['ODEF-SQI']+pos['ODEF-FNI']
610
+ pos['ExGA'] = pos['ODEF-SRI']+pos['ODEF-SQI']
611
+
612
+ #...and their percentiles
613
+ pos['EGF-P'] = pos['EGF'].rank(pct=True)
614
+ pos['ExGF-P'] = pos['ExGF'].rank(pct=True)
615
+ pos['EGA-P'] = pos['EGA'].rank(pct=True)
616
+ pos['ExGA-P'] = pos['ExGA'].rank(pct=True)
617
+
618
+ pos['EGA-P'] = 1-pos['EGA']
619
+ pos['ExGA-P'] = 1-pos['ExGA']
620
+
621
+ #...and then their totals
622
+ pos['EGF-T'] = (pos['EGF']/60)*pos['TOI']
623
+ pos['ExGF-T'] = (pos['ExGF']/60)*pos['TOI']
624
+ pos['EGA-T'] = (pos['EGA']/60)*pos['TOI']
625
+ pos['ExGA-T'] = (pos['ExGA']/60)*pos['TOI']
626
+
627
+ #Goal Composites...
628
+ pos['Team-Adjusted-EGI'] = pos['ODEF-FNI']-pos['ExGA']
629
+ pos['GISAx'] = pos['ExGA']-pos['EGA']
630
+ pos['NetGI'] = pos['EGF'] - pos['EGA']
631
+ pos['NetxGI'] = pos['ExGF'] - pos['ExGA']
632
+
633
+ #...and their percentiles
634
+ pos['Team-Adjusted-EGI-P'] = pos['Team-Adjusted-EGI'].rank(pct=True)
635
+ pos['GISAx-P'] = pos['GISAx'].rank(pct=True)
636
+ pos['NetGI-P'] = pos['NetGI'].rank(pct=True)
637
+ pos['NetxGI-P'] = pos['NetxGI'].rank(pct=True)
638
+
639
+ #...and then their totals
640
+ pos['Team-Adjusted-EGI-T'] = (pos['Team-Adjusted-EGI']/60)*pos['TOI']
641
+ pos['GISAx-T'] = (pos['GISAx']/60)*pos['TOI']
642
+ pos['NetGI-T'] = (pos['NetGI']/60)*pos['TOI']
643
+ pos['NetxGI-T'] = (pos['NetxGI']/60)*pos['TOI']
644
+
559
645
  #Return: team stats with shooting impacts
560
- return pos.drop(columns=['fsh','fenwick','xg_fen','xg','g','finishing']).sort_values(['Season','Team'])
646
+ return pos.drop(columns=['fsh','fenwick','xg_fen','xg','g','finishing']).sort_values(['Goalie','Season','Team'])
647
+
648
+ elif type =='team':
649
+ pos = agg
650
+ for group in [('OOFF','F'),('ODEF','A')]:
651
+ #Have to set this columns for compatibility with df.apply
652
+ pos['fsh'] = pos[f'Fsh{group[1]}%']
653
+ pos['fenwick'] = pos[f'F{group[1]}/60']
654
+ pos['xg'] = pos[f'xG{group[1]}/60']
655
+ pos['g'] = pos[f'G{group[1]}/60']
656
+ pos['xg_fen'] = pos[f'xG{group[1]}/F{group[1]}']
657
+ pos['finishing'] = pos[f'G{group[1]}/xG{group[1]}']
658
+
659
+ #Find average for position in frame
660
+ avg_fen = pos['fenwick'].mean()
661
+ avg_xg = pos['xg'].mean()
662
+ avg_g = pos['g'].mean()
663
+ avg_fsh = avg_g/avg_fen
664
+ avg_xg_fen = avg_xg/avg_fen
561
665
 
666
+ #Calculate composite percentiles
667
+ pos[f'{group[0]}-SR'] = pos['fenwick'].rank(pct=True)
668
+ pos[f'{group[0]}-SQ'] = pos['xg_fen'].rank(pct=True)
669
+ pos[f'{group[0]}-FN'] = pos['finishing'].rank(pct=True)
670
+
671
+ #Calculate shot rate, shot quality, and finishing impacts
672
+ pos[f'{group[0]}-SRI'] = pos['g'] - pos.apply(lambda x: goal_comp(avg_fen,x.xg_fen,x.xg,x.g,avg_fsh),axis=1)
673
+ pos[f'{group[0]}-SQI'] = pos['g'] - pos.apply(lambda x: goal_comp(x.fenwick,avg_xg_fen,x.xg,x.g,avg_fsh),axis=1)
674
+ pos[f'{group[0]}-FNI'] = pos['g'] - pos.apply(lambda x: goal_comp(x.fenwick,x.xg_fen,avg_xg,avg_g,avg_fsh),axis=1)
675
+
676
+ #Convert impacts to totals
677
+ #Calculate shot rate, shot quality, and finishing impacts
678
+ pos[f'{group[0]}-SRI-T'] = (pos[f'{group[0]}-SRI']/60)*pos['TOI']
679
+ pos[f'{group[0]}-SQI-T'] = (pos[f'{group[0]}-SQI']/60)*pos['TOI']
680
+ pos[f'{group[0]}-FNI-T'] = (pos[f'{group[0]}-FNI']/60)*pos['TOI']
681
+
682
+ #Rank per 60 stats
683
+ for stat in per_sixty[10:len(per_sixty)]:
684
+ pos[f'{stat}/60-P'] = pos[f'{stat}/60'].rank(pct=True)
685
+
686
+ #Flip percentiles for against stats
687
+ for stat in ['FA','xGA','GA','CA','HA','Give','Penl','Penl2','Penl5']:
688
+ pos[f'{stat}/60-P'] = 1-pos[f'{stat}/60-P']
689
+
690
+ #Add extra metrics
691
+ pos['RushF/60'] = (pos['RushF']/pos['TOI'])*60
692
+ pos['RushA/60'] = (pos['RushA']/pos['TOI'])*60
693
+ pos['RushesFF'] = pos['RushF/60'].rank(pct=True)
694
+ pos['RushesFA'] = 1 - pos['RushA/60'].rank(pct=True)
695
+ pos['RushFxG/60'] = (pos['RushFxG']/pos['TOI'])*60
696
+ pos['RushAxG/60'] = (pos['RushAxG']/pos['TOI'])*60
697
+ pos['RushesxGF'] = pos['RushFxG/60'].rank(pct=True)
698
+ pos['RushesxGA'] = 1 - pos['RushAxG/60'].rank(pct=True)
699
+ pos['RushFG/60'] = (pos['RushFG']/pos['TOI'])*60
700
+ pos['RushAG/60'] = (pos['RushAG']/pos['TOI'])*60
701
+ pos['RushesGF'] = pos['RushFG/60'].rank(pct=True)
702
+ pos['RushesGA'] = 1 - pos['RushAG/60'].rank(pct=True)
703
+
704
+ #Flip against metric percentiles
705
+ pos['ODEF-SR'] = 1-pos['ODEF-SR']
706
+ pos['ODEF-SQ'] = 1-pos['ODEF-SQ']
707
+ pos['ODEF-FN'] = 1-pos['ODEF-FN']
708
+
709
+ pos['EGF'] = pos['OOFF-SRI']+pos['OOFF-SQI']+pos['OOFF-FNI']
710
+ pos['ExGF'] = pos['OOFF-SRI']+pos['OOFF-SQI']
711
+ pos['EGA'] = pos['ODEF-SRI']+pos['ODEF-SQI']+pos['ODEF-FNI']
712
+ pos['ExGA'] = pos['ODEF-SRI']+pos['ODEF-SQI']
713
+
714
+ #...and their percentiles
715
+ pos['EGF-P'] = pos['EGF'].rank(pct=True)
716
+ pos['ExGF-P'] = pos['ExGF'].rank(pct=True)
717
+ pos['EGA-P'] = pos['EGA'].rank(pct=True)
718
+ pos['ExGA-P'] = pos['ExGA'].rank(pct=True)
719
+
720
+ pos['EGA-P'] = 1-pos['EGA']
721
+ pos['ExGA-P'] = 1-pos['ExGA']
722
+
723
+ #...and then their totals
724
+ pos['EGF-T'] = (pos['EGF']/60)*pos['TOI']
725
+ pos['ExGF-T'] = (pos['ExGF']/60)*pos['TOI']
726
+ pos['EGA-T'] = (pos['EGA']/60)*pos['TOI']
727
+ pos['ExGA-T'] = (pos['ExGA']/60)*pos['TOI']
728
+
729
+ #Return: team stats with shooting impacts
730
+ return pos.drop(columns=['fsh','fenwick','xg_fen','xg','g','finishing']).sort_values(['Season','Team'])
562
731
 
563
732
  else:
564
733
  #Remove skaters with less than 150 minutes of TOI then split between forwards and dmen
565
- agg = agg.loc[agg['TOI']>=150]
566
- forwards = agg.loc[agg['Position']!='D']
567
- defensemen = agg.loc[agg['Position']=='D']
734
+ #These are added back in after the fact
735
+ forwards = agg.loc[(agg['Position']!='D')&(agg['TOI']>=150)]
736
+ defensemen = agg.loc[(agg['Position']=='D')&(agg['TOI']>=150)]
737
+ non_players = agg.loc[agg['TOI']<150]
568
738
 
569
739
  #Loop through both positions, all groupings (INDV, OOFF, and ODEF) generating impacts
570
740
  for pos in [forwards,defensemen]:
@@ -594,15 +764,29 @@ def nhl_shooting_impacts(agg,team=False):
594
764
  pos[f'{group[0]}-SQI'] = pos['g'] - pos.apply(lambda x: goal_comp(x.fenwick,avg_xg_fen,x.xg,x.g,avg_fsh),axis=1)
595
765
  pos[f'{group[0]}-FNI'] = pos['g'] - pos.apply(lambda x: goal_comp(x.fenwick,x.xg_fen,avg_xg,avg_g,avg_fsh),axis=1)
596
766
 
767
+ #Convert impacts to totals
768
+ #Calculate shot rate, shot quality, and finishing impacts
769
+ pos[f'{group[0]}-SRI-T'] = (pos[f'{group[0]}-SRI']/60)*pos['TOI']
770
+ pos[f'{group[0]}-SQI-T'] = (pos[f'{group[0]}-SQI']/60)*pos['TOI']
771
+ pos[f'{group[0]}-FNI-T'] = (pos[f'{group[0]}-FNI']/60)*pos['TOI']
772
+
597
773
  #Calculate On-Ice Involvement Percentiles
598
- pos['Fenwick'] = pos['FC%'].rank(pct=True)
599
- pos['xG'] = pos['xGC%'].rank(pct=True)
600
- pos['Goal Factor'] = pos['GI%'].rank(pct=True)
601
- pos['Goal Scoring'] = pos['GC%'].rank(pct=True)
602
- pos['Rush/60'] = (pos['Rush']/pos['TOI'])*60
603
- pos['RushxG/60'] = (pos['Rush xG']/pos['TOI'])*60
604
- pos['Rushes xG'] = pos['RushxG/60'].rank(pct=True)
605
- pos['Rushes FF'] = pos['Rush/60'].rank(pct=True)
774
+ pos['Fi/F'] = pos['FC%'].rank(pct=True)
775
+ pos['xGi/F'] = pos['xGC%'].rank(pct=True)
776
+ pos['Pi/F'] = pos['GI%'].rank(pct=True)
777
+ pos['Gi/F'] = pos['GC%'].rank(pct=True)
778
+ pos['RushFi/60'] = (pos['Rush']/pos['TOI'])*60
779
+ pos['RushxGi/60'] = (pos['Rush xG']/pos['TOI'])*60
780
+ pos['RushesxGi'] = pos['RushxGi/60'].rank(pct=True)
781
+ pos['RushesFi'] = pos['RushFi/60'].rank(pct=True)
782
+
783
+ #Rank per 60 stats
784
+ for stat in per_sixty:
785
+ pos[f'{stat}/60-P'] = pos[f'{stat}/60'].rank(pct=True)
786
+
787
+ #Flip percentiles for against stats
788
+ for stat in ['FA','xGA','GA','CA','HA','Give','Penl','Penl2','Penl5']:
789
+ pos[f'{stat}/60-P'] = 1-pos[f'{stat}/60-P']
606
790
 
607
791
  #Add positions back together
608
792
  complete = pd.concat([forwards,defensemen])
@@ -613,108 +797,253 @@ def nhl_shooting_impacts(agg,team=False):
613
797
  complete['ODEF-FN'] = 1-complete['ODEF-FN']
614
798
 
615
799
  #Extraneous Values
616
- complete['Extraneous Gi'] = complete['INDV-SRI']+complete['INDV-SQI']+complete['INDV-FNI']
617
- complete['Extraneous xGi'] = complete['INDV-SRI']+complete['INDV-SQI']
618
- complete['Extraneous GF'] = complete['OOFF-SRI']+complete['OOFF-SQI']+complete['OOFF-FNI']
619
- complete['Extraneous xGF'] = complete['OOFF-SRI']+complete['OOFF-SQI']
620
- complete['Extraneous GA'] = complete['ODEF-SRI']+complete['ODEF-SQI']+complete['ODEF-FNI']
621
- complete['Extraneous xGA'] = complete['ODEF-SRI']+complete['ODEF-SQI']
622
-
623
- #Goal Composites
624
- complete['Linemate Extraneous Goals'] = complete['Extraneous GF'] - complete['Extraneous Gi']
625
- complete['Linemate Goal Induction'] = complete['Linemate Extraneous Goals']*complete['AC%']
626
- complete['Composite Goal Impact'] = complete['Extraneous Gi'] + complete['Linemate Goal Induction']
627
- complete['Linemate Rel. Goal Impact'] = complete['Composite Goal Impact'] - (complete['Extraneous GF']-complete['Composite Goal Impact'])
628
- complete['Net Goal Impact'] = complete['Extraneous GF'] - complete['Extraneous GA']
629
- complete['Net xGoal Impact'] = complete['Extraneous xGF'] - complete['Extraneous xGA']
630
-
800
+ complete['EGi'] = complete['INDV-SRI']+complete['INDV-SQI']+complete['INDV-FNI']
801
+ complete['ExGi'] = complete['INDV-SRI']+complete['INDV-SQI']
802
+ complete['EGF'] = complete['OOFF-SRI']+complete['OOFF-SQI']+complete['OOFF-FNI']
803
+ complete['ExGF'] = complete['OOFF-SRI']+complete['OOFF-SQI']
804
+ complete['EGA'] = complete['ODEF-SRI']+complete['ODEF-SQI']+complete['ODEF-FNI']
805
+ complete['ExGA'] = complete['ODEF-SRI']+complete['ODEF-SQI']
806
+
807
+ #...and their percentiles
808
+ complete['EGi-P'] = complete['EGi'].rank(pct=True)
809
+ complete['ExGi-P'] = complete['ExGi'].rank(pct=True)
810
+ complete['EGF-P'] = complete['EGF'].rank(pct=True)
811
+ complete['ExGF-P'] = complete['ExGF'].rank(pct=True)
812
+ complete['EGA-P'] = complete['EGA'].rank(pct=True)
813
+ complete['ExGA-P'] = complete['ExGA'].rank(pct=True)
814
+
815
+ complete['EGA-P'] = 1-complete['EGA']
816
+ complete['ExGA-P'] = 1-complete['ExGA']
817
+
818
+ #...and then their totals
819
+ complete['EGi-T'] = (complete['EGi']/60)*complete['TOI']
820
+ complete['ExGi-T'] = (complete['ExGi']/60)*complete['TOI']
821
+ complete['EGF-T'] = (complete['EGF']/60)*complete['TOI']
822
+ complete['ExGF-T'] = (complete['ExGF']/60)*complete['TOI']
823
+ complete['EGA-T'] = (complete['EGA']/60)*complete['TOI']
824
+ complete['ExGA-T'] = (complete['ExGA']/60)*complete['TOI']
825
+
826
+ #Goal Composites...
827
+ complete['LiEG'] = complete['EGF'] - complete['EGi']
828
+ complete['LiExG'] = complete['ExGF'] - complete['ExGi']
829
+ complete['LiGIn'] = complete['LiEG']*complete['AC%']
830
+ complete['LixGIn'] = complete['LiExG']*complete['AC%']
831
+ complete['ALiGIn'] = complete['LiGIn']-complete['LixGIn']
832
+ complete['CompGI'] = complete['EGi'] + complete['LiGIn']
833
+ complete['LiRelGI'] = complete['CompGI'] - (complete['EGF']-complete['CompGI'])
834
+ complete['NetGI'] = complete['EGF'] - complete['EGA']
835
+ complete['NetxGI'] = complete['ExGF'] - complete['ExGA']
836
+
837
+ #...and their percentiles
838
+ complete['LiEG-P'] = complete['LiEG'].rank(pct=True)
839
+ complete['LiExG-P'] = complete['LiExG'].rank(pct=True)
840
+ complete['LiGIn-P'] = complete['LiGIn'].rank(pct=True)
841
+ complete['LixGIn-P'] = complete['LixGIn'].rank(pct=True)
842
+ complete['ALiGIn-P'] = complete['ALiGIn'].rank(pct=True)
843
+ complete['CompGI-P'] = complete['CompGI'].rank(pct=True)
844
+ complete['LiRelGI-P'] = complete['LiRelGI'].rank(pct=True)
845
+ complete['NetGI-P'] = complete['NetGI'].rank(pct=True)
846
+ complete['NetxGI-P'] = complete['NetxGI'].rank(pct=True)
847
+
848
+ #..and then their totals
849
+ complete['LiEG-T'] = (complete['LiEG']/60)*complete['TOI']
850
+ complete['LiExG-T'] = (complete['LiExG']/60)*complete['TOI']
851
+ complete['LiGIn-T'] = (complete['LiGIn']/60)*complete['TOI']
852
+ complete['LixGIn-T'] = (complete['LixGIn']/60)*complete['TOI']
853
+ complete['ALiGIn-T'] = (complete['ALiGIn']/60)*complete['TOI']
854
+ complete['CompGI-T'] = (complete['CompGI']/60)*complete['TOI']
855
+ complete['LiRelGI-T'] = (complete['LiRelGI']/60)*complete['TOI']
856
+ complete['NetGI-T'] = (complete['NetGI']/60)*complete['TOI']
857
+ complete['NetxGI-T'] = (complete['NetxGI']/60)*complete['TOI']
858
+
859
+ #Add back skaters with less than 150 minutes TOI
860
+ df = pd.concat([complete,non_players]).drop(columns=['fsh','fenwick','xg_fen','xg','g','finishing']).sort_values(['Player','Season','Team','ID'])
631
861
  #Return: skater stats with shooting impacts
632
- return complete.drop(columns=['fsh','fenwick','xg_fen','xg','g','finishing']).sort_values(['Player','Season','Team','ID'])
862
+ return df
633
863
 
634
- def nhl_calculate_stats(pbp,type,season_types,game_strength,roster_path="rosters/nhl_rosters.csv",xg="moneypuck",shot_impact=False):
864
+ def nhl_calculate_stats(pbp,type,season_types,game_strength,split_game=False,roster_path="rosters/nhl_rosters.csv",shot_impact=False):
635
865
  #Given play-by-play, seasonal information, game_strength, rosters, and xG model, return aggregated stats
636
866
  # param 'pbp' - play-by-play dataframe
637
- # param 'type' - type of stats to calculate ('skater', 'goaltender', or 'team')
867
+ # param 'type' - type of stats to calculate ('skater', 'goalie', or 'team')
638
868
  # param 'season' - season or timeframe of events in play-by-play
639
869
  # param 'season_type' - list of season types (preseason, regular season, or playoffs) to include in aggregation
640
870
  # param 'game_strength' - list of game_strengths to include in aggregation
871
+ # param 'split_game' - boolean which if true groups aggregation by game
641
872
  # param 'roster_path' - path to roster file
642
- # param 'xg' - xG model to apply to pbp for aggregation
643
873
  # param 'shot_impact' - boolean determining if the shot impact model will be applied to the dataset
644
874
 
645
- print(f"Calculating statistics for all games in the provided play-by-play data for {type}s...\nSeasons included: {pbp['season'].drop_duplicates().to_list()}...")
875
+ print(f"Calculating statistics for all games in the provided play-by-play data at {game_strength} for {type}s...\nSeasons included: {pbp['season'].drop_duplicates().to_list()}...")
646
876
  start = time.perf_counter()
647
877
 
648
- #Add extra data and apply team changes
649
- pbp = prep_xG_data(pbp).replace(convert_team_abbr)
650
-
651
878
  #Check if xG column exists and apply model if it does not
652
879
  try:
653
880
  pbp['xG']
654
- except KeyError:
655
- if xg == 'wsba':
656
- pbp = wsba_xG(pbp)
657
- else:
658
- pbp = moneypuck_xG(pbp)
881
+ except KeyError:
882
+ pbp = wsba_xG(pbp)
659
883
 
660
- #Filter by season types and remove shootouts
661
- pbp = pbp.loc[(pbp['season_type'].isin(season_types)) & (pbp['period'] < 5)]
884
+ #Filter by season types, remove shootouts, remove shots with no coordinates, and remove shots on empty nets
885
+ pbp_noshot = pbp.loc[(pbp['season_type'].isin(season_types)) & ~(pbp['event_type'].isin(fenwick_events))]
886
+
887
+ #Include everything when strengths is set to 'all'
888
+ if game_strength == 'all':
889
+ mask = ((pbp['event_type'].isin(fenwick_events)) & (pbp['empty_net']<1))
890
+ else:
891
+ mask = ((pbp['event_type'].isin(fenwick_events)) & (pbp['empty_net']<1) & (pbp['x'].notna()) & (pbp['y'].notna()))
662
892
 
893
+ pbp_shot = pbp.loc[(pbp['season_type'].isin(season_types)) & mask]
894
+
895
+ pbp = pd.concat([pbp_shot,pbp_noshot])
896
+
663
897
  #Convert all columns with player ids to float in order to avoid merging errors
664
898
  for col in get_col():
665
899
  if "_id" in col:
666
900
  try: pbp[col] = pbp[col].astype(float)
667
901
  except KeyError: continue
668
902
 
669
- # Filter by game strength if not "all"
670
- if game_strength != "all":
671
- pbp = pbp.loc[pbp['strength_state'].isin(game_strength)]
903
+ #Split by game if specified
904
+ if split_game:
905
+ second_group = ['season','game_id']
906
+ else:
907
+ second_group = ['season']
672
908
 
673
909
  #Split calculation
674
- if type == 'team':
675
- complete = calc_team(pbp)
676
-
677
- #WSBA
678
- complete['WSBA'] = complete['Team']+complete['Season'].astype(str)
910
+ if type == 'goalie':
911
+ complete = calc_goalie(pbp,game_strength,second_group)
679
912
 
680
913
  #Set TOI to minute
681
914
  complete['TOI'] = complete['TOI']/60
682
915
 
683
916
  #Add per 60 stats
684
- for stat in per_sixty[7:13]:
917
+ for stat in ['FF','FA','xGF','xGA','GF','GA','CF','CA','GSAx']:
685
918
  complete[f'{stat}/60'] = (complete[stat]/complete['TOI'])*60
919
+
920
+ complete['GF%'] = complete['GF']/(complete['GF']+complete['GA'])
921
+ complete['xGF%'] = complete['xGF']/(complete['xGF']+complete['xGA'])
922
+ complete['FF%'] = complete['FF']/(complete['FF']+complete['FA'])
923
+ complete['CF%'] = complete['CF']/(complete['CF']+complete['CA'])
924
+
925
+ #Remove entries with no ID listed
926
+ complete = complete.loc[complete['ID'].notna()]
686
927
 
687
- #Rank per 60 stats
688
- for stat in per_sixty[7:13]:
689
- complete[f'{stat}/60 Percentile'] = complete[f'{stat}/60'].rank(pct=True)
928
+ #Import rosters and player info
929
+ rosters = pd.read_csv(roster_path)
930
+ names = rosters[['id','fullName',
931
+ 'headshot','positionCode','shootsCatches',
932
+ 'heightInInches','weightInPounds',
933
+ 'birthDate','birthCountry']].drop_duplicates(subset=['id','fullName'],keep='last')
690
934
 
691
- #Flip percentiles for against stats
692
- for stat in ['FA','xGA','GA']:
693
- complete[f'{stat}/60 Percentile'] = 1-complete[f'{stat}/60 Percentile']
935
+ #Add names
936
+ complete = pd.merge(complete,names,how='left',left_on='ID',right_on='id')
937
+
938
+ #Rename if there are no missing names
939
+ complete = complete.rename(columns={'fullName':'Goalie',
940
+ 'headshot':'Headshot',
941
+ 'positionCode':'Position',
942
+ 'shootsCatches':'Handedness',
943
+ 'heightInInches':'Height (in)',
944
+ 'weightInPounds':'Weight (lbs)',
945
+ 'birthDate':'Birthday',
946
+ 'birthCountry':'Nationality'})
947
+
948
+ #WSBA
949
+ complete['WSBA'] = complete['Goalie']+complete['Team']+complete['Season'].astype(str)
950
+
951
+ #Add player age
952
+ complete['Birthday'] = pd.to_datetime(complete['Birthday'])
953
+ complete['season_year'] = complete['Season'].astype(str).str[4:8].astype(int)
954
+ complete['Age'] = complete['season_year'] - complete['Birthday'].dt.year
955
+
956
+ #Find player headshot
957
+ complete['Headshot'] = 'https://assets.nhle.com/mugs/nhl/'+complete['Season'].astype(str)+'/'+complete['Team']+'/'+complete['ID'].astype(int).astype(str)+'.png'
694
958
 
695
959
  end = time.perf_counter()
696
960
  length = end-start
697
961
  print(f'...finished in {(length if length <60 else length/60):.2f} {'seconds' if length <60 else 'minutes'}.')
698
- #Apply shot impacts if necessary (Note: this will remove skaters with fewer than 150 minutes of TOI due to the shot impact TOI rule)
962
+
963
+ head = ['Goalie','ID','Game'] if 'Game' in complete.columns else ['Goalie','ID']
964
+ complete = complete[head+[
965
+ "Season","Team",'WSBA',
966
+ 'Headshot','Position','Handedness',
967
+ 'Height (in)','Weight (lbs)',
968
+ 'Birthday','Age','Nationality',
969
+ 'GP','TOI',
970
+ "GF","FF","xGF","xGF/FF","GF/xGF","FshF%",
971
+ "GA","FA","xGA","xGA/FA","GA/xGA","FshA%",
972
+ 'CF','CA',
973
+ 'GSAx',
974
+ 'RushF','RushA','RushFxG','RushAxG','RushFG','RushAG'
975
+ ]+[f'{stat}/60' for stat in ['FF','FA','xGF','xGA','GF','GA','CF','CA','GSAx']]]
976
+
977
+ #Apply shot impacts if necessary
699
978
  if shot_impact:
700
- return nhl_shooting_impacts(complete,True)
701
- else:
702
- return complete
979
+ complete = nhl_shooting_impacts(complete,'goalie')
980
+
981
+ end = time.perf_counter()
982
+ length = end-start
983
+ print(f'...finished in {(length if length <60 else length/60):.2f} {'seconds' if length <60 else 'minutes'}.')
984
+
985
+ return complete
986
+
987
+ elif type == 'team':
988
+ complete = calc_team(pbp,game_strength,second_group)
989
+
990
+ #WSBA
991
+ complete['WSBA'] = complete['Team']+complete['Season'].astype(str)
992
+
993
+ #Set TOI to minute
994
+ complete['TOI'] = complete['TOI']/60
995
+
996
+ #Add per 60 stats
997
+ for stat in per_sixty[10:len(per_sixty)]:
998
+ complete[f'{stat}/60'] = (complete[stat]/complete['TOI'])*60
999
+
1000
+ complete['GF%'] = complete['GF']/(complete['GF']+complete['GA'])
1001
+ complete['xGF%'] = complete['xGF']/(complete['xGF']+complete['xGA'])
1002
+ complete['FF%'] = complete['FF']/(complete['FF']+complete['FA'])
1003
+ complete['CF%'] = complete['CF']/(complete['CF']+complete['CA'])
1004
+
1005
+ head = ['Team','Game'] if 'Game' in complete.columns else ['Team']
1006
+ complete = complete[head+[
1007
+ 'Season','WSBA',
1008
+ 'GP','TOI',
1009
+ "GF","FF","xGF","xGF/FF","GF/xGF","FshF%",
1010
+ "GA","FA","xGA","xGA/FA","GA/xGA","FshA%",
1011
+ 'CF','CA',
1012
+ 'GF%','FF%','xGF%','CF%',
1013
+ 'HF','HA','HF%',
1014
+ 'Penl','Penl2','Penl5','PIM','Draw','PENL%',
1015
+ 'Give','Take','PM%',
1016
+ 'Block',
1017
+ 'RushF','RushA','RushFxG','RushAxG','RushFG','RushAG'
1018
+ ]+[f'{stat}/60' for stat in per_sixty[10:len(per_sixty)]]]
1019
+ #Apply shot impacts if necessary
1020
+ if shot_impact:
1021
+ complete = nhl_shooting_impacts(complete,'team')
1022
+
1023
+ end = time.perf_counter()
1024
+ length = end-start
1025
+ print(f'...finished in {(length if length <60 else length/60):.2f} {'seconds' if length <60 else 'minutes'}.')
1026
+
1027
+ return complete
703
1028
  else:
704
- indv_stats = calc_indv(pbp)
705
- onice_stats = calc_onice(pbp)
1029
+ indv_stats = calc_indv(pbp,game_strength,second_group)
1030
+ onice_stats = calc_onice(pbp,game_strength,second_group)
706
1031
 
707
1032
  #IDs sometimes set as objects
708
1033
  indv_stats['ID'] = indv_stats['ID'].astype(float)
709
1034
  onice_stats['ID'] = onice_stats['ID'].astype(float)
710
1035
 
711
1036
  #Merge and add columns for extra stats
712
- complete = pd.merge(indv_stats,onice_stats,how="outer",on=['ID','Team','Season'])
1037
+ complete = pd.merge(indv_stats,onice_stats,how="outer",on=['ID','Team','Season']+(['Game'] if 'game_id' in second_group else []))
713
1038
  complete['GC%'] = complete['Gi']/complete['GF']
714
1039
  complete['AC%'] = (complete['A1']+complete['A2'])/complete['GF']
715
1040
  complete['GI%'] = (complete['Gi']+complete['A1']+complete['A2'])/complete['GF']
716
1041
  complete['FC%'] = complete['Fi']/complete['FF']
717
1042
  complete['xGC%'] = complete['xGi']/complete['xGF']
1043
+ complete['GF%'] = complete['GF']/(complete['GF']+complete['GA'])
1044
+ complete['xGF%'] = complete['xGF']/(complete['xGF']+complete['xGA'])
1045
+ complete['FF%'] = complete['FF']/(complete['FF']+complete['FA'])
1046
+ complete['CF%'] = complete['CF']/(complete['CF']+complete['CA'])
718
1047
 
719
1048
  #Remove entries with no ID listed
720
1049
  complete = complete.loc[complete['ID'].notna()]
@@ -742,18 +1071,6 @@ def nhl_calculate_stats(pbp,type,season_types,game_strength,roster_path="rosters
742
1071
  #Set TOI to minute
743
1072
  complete['TOI'] = complete['TOI']/60
744
1073
 
745
- #Add per 60 stats
746
- for stat in per_sixty:
747
- complete[f'{stat}/60'] = (complete[stat]/complete['TOI'])*60
748
-
749
- #Rank per 60 stats
750
- for stat in per_sixty:
751
- complete[f'{stat}/60 Percentile'] = complete[f'{stat}/60'].rank(pct=True)
752
-
753
- #Flip percentiles for against stats
754
- for stat in ['FA','xGA','GA']:
755
- complete[f'{stat}/60 Percentile'] = 1-complete[f'{stat}/60 Percentile']
756
-
757
1074
  #Add player age
758
1075
  complete['Birthday'] = pd.to_datetime(complete['Birthday'])
759
1076
  complete['season_year'] = complete['Season'].astype(str).str[4:8].astype(int)
@@ -762,42 +1079,56 @@ def nhl_calculate_stats(pbp,type,season_types,game_strength,roster_path="rosters
762
1079
  #Find player headshot
763
1080
  complete['Headshot'] = 'https://assets.nhle.com/mugs/nhl/'+complete['Season'].astype(str)+'/'+complete['Team']+'/'+complete['ID'].astype(int).astype(str)+'.png'
764
1081
 
765
- end = time.perf_counter()
766
- length = end-start
767
1082
  #Remove goalies that occasionally appear in a set
768
1083
  complete = complete.loc[complete['Position']!='G']
769
1084
  #Add WSBA ID
770
1085
  complete['WSBA'] = complete['Player']+complete['Season'].astype(str)+complete['Team']
771
1086
 
1087
+ #Add per 60 stats
1088
+ for stat in per_sixty:
1089
+ complete[f'{stat}/60'] = (complete[stat]/complete['TOI'])*60
1090
+
772
1091
  #Shot Type Metrics
773
1092
  type_metrics = []
774
1093
  for type in shot_types:
775
1094
  for stat in per_sixty[:3]:
776
1095
  type_metrics.append(f'{type.capitalize()}{stat}')
777
1096
 
778
- complete = complete[[
779
- 'Player','ID',
1097
+ head = ['Player','ID','Game'] if 'Game' in complete.columns else ['Player','ID']
1098
+ complete = complete[head+[
780
1099
  "Season","Team",'WSBA',
781
1100
  'Headshot','Position','Handedness',
782
1101
  'Height (in)','Weight (lbs)',
783
1102
  'Birthday','Age','Nationality',
784
1103
  'GP','TOI',
785
1104
  "Gi","A1","A2",'P1','P',
1105
+ 'Give','Take','PM%','HF','HA','HF%',
786
1106
  "Fi","xGi",'xGi/Fi',"Gi/xGi","Fshi%",
787
1107
  "GF","FF","xGF","xGF/FF","GF/xGF","FshF%",
788
1108
  "GA","FA","xGA","xGA/FA","GA/xGA","FshA%",
1109
+ 'Ci','CF','CA','CF%',
1110
+ 'FF%','xGF%','GF%',
789
1111
  'Rush',"Rush xG",'Rush G',"GC%","AC%","GI%","FC%","xGC%",
790
- ]+[f'{stat}/60' for stat in per_sixty]+[f'{stat}/60 Percentile' for stat in per_sixty]+type_metrics].fillna(0).sort_values(['Player','Season','Team','ID'])
1112
+ 'F','FW','FL','F%',
1113
+ 'Penl','Penl2','Penl5',
1114
+ 'Draw','PIM','PENL%',
1115
+ 'Block',
1116
+ 'OZF','NZF','DZF',
1117
+ 'OZF%','NZF%','DZF%',
1118
+ ]+[f'{stat}/60' for stat in per_sixty]+type_metrics].fillna(0).sort_values(['Player','Season','Team','ID'])
791
1119
 
792
- print(f'...finished in {(length if length <60 else length/60):.2f} {'seconds' if length <60 else 'minutes'}.')
793
1120
  #Apply shot impacts if necessary (Note: this will remove skaters with fewer than 150 minutes of TOI due to the shot impact TOI rule)
794
1121
  if shot_impact:
795
- return nhl_shooting_impacts(complete,False)
796
- else:
797
- return complete
1122
+ complete = nhl_shooting_impacts(complete,'skater')
1123
+
1124
+ end = time.perf_counter()
1125
+ length = end-start
1126
+ print(f'...finished in {(length if length <60 else length/60):.2f} {'seconds' if length <60 else 'minutes'}.')
798
1127
 
799
- def nhl_plot_skaters_shots(pbp,skater_dict,strengths,marker_dict=event_markers,onice = 'indv',title = True,legend=False,xg='moneypuck'):
800
- #Returns list of plots for specified skaters
1128
+ return complete
1129
+
1130
+ def nhl_plot_skaters_shots(pbp,skater_dict,strengths,marker_dict=event_markers,onice = 'indv',title = True,legend=False):
1131
+ #Returns dict of plots for specified skaters
801
1132
  # param 'pbp' - pbp to plot data
802
1133
  # param 'skater_dict' - skaters to plot shots for (format: {'Patrice Bergeron':['20242025','BOS']})
803
1134
  # param 'strengths' - strengths to include in plotting
@@ -809,18 +1140,19 @@ def nhl_plot_skaters_shots(pbp,skater_dict,strengths,marker_dict=event_markers,o
809
1140
 
810
1141
  print(f'Plotting the following skater shots: {skater_dict}...')
811
1142
 
812
- #Iterate through games, adding plot to list
813
- skater_plots = []
1143
+ #Iterate through skaters, adding plots to dict
1144
+ skater_plots = {}
814
1145
  for skater in skater_dict.keys():
815
1146
  skater_info = skater_dict[skater]
816
1147
  title = f'{skater} Fenwick Shots for {skater_info[1]} in {skater_info[0][2:4]}-{skater_info[0][6:8]}' if title else ''
817
- skater_plots.append(plot_skater_shots(pbp,skater,skater_info[0],skater_info[1],strengths,title,marker_dict,onice,legend,xg))
1148
+ #Key is formatted as PLAYERSEASONTEAM (i.e. PATRICE BERGERON20212022BOS)
1149
+ skater_plots.update({f'{skater}{skater_info[0]}{skater_info[1]}':[plot_skater_shots(pbp,skater,skater_info[0],skater_info[1],strengths,title,marker_dict,onice,legend)]})
818
1150
 
819
1151
  #Return: list of plotted skater shot charts
820
1152
  return skater_plots
821
1153
 
822
- def nhl_plot_games(pbp,events,strengths,game_ids='all',marker_dict=event_markers,team_colors={'away':'primary','home':'primary'},legend=False,xg='moneypuck'):
823
- #Returns list of plots for specified games
1154
+ def nhl_plot_games(pbp,events,strengths,game_ids='all',marker_dict=event_markers,team_colors={'away':'primary','home':'primary'},legend=False):
1155
+ #Returns dict of plots for specified games
824
1156
  # param 'pbp' - pbp to plot data
825
1157
  # param 'events' - type of events to plot
826
1158
  # param 'strengths' - strengths to include in plotting
@@ -835,8 +1167,10 @@ def nhl_plot_games(pbp,events,strengths,game_ids='all',marker_dict=event_markers
835
1167
 
836
1168
  print(f'Plotting the following games: {game_ids}...')
837
1169
 
838
- #Iterate through games, adding plot to list
839
- game_plots = [plot_game_events(pbp,game,events,strengths,marker_dict,team_colors,legend,xg) for game in game_ids]
1170
+ game_plots = {}
1171
+ #Iterate through games, adding plot to dict
1172
+ for game in game_ids:
1173
+ game_plots.update({game:[plot_game_events(pbp,game,events,strengths,marker_dict,team_colors,legend)]})
840
1174
 
841
1175
  #Return: list of plotted game events
842
1176
  return game_plots
@@ -872,7 +1206,7 @@ def repo_load_pbp(seasons = []):
872
1206
 
873
1207
  #Add parquet to total
874
1208
  print(f'Loading play-by-play from the following seasons: {seasons}...')
875
- dfs = [pd.read_parquet(f"https://github.com/owensingh38/wsba_hockey/raw/refs/heads/main/src/wsba_hockey/pbp/parquet/nhl_pbp_{season}.parquet") for season in seasons]
1209
+ dfs = [pd.read_parquet(f"https://f005.backblazeb2.com/file/weakside-breakout/pbp/{season}.parquet") for season in seasons]
876
1210
 
877
1211
  return pd.concat(dfs)
878
1212
 
@@ -880,9 +1214,3 @@ def repo_load_seasons():
880
1214
  #List of available seasons to scrape
881
1215
 
882
1216
  return seasons
883
-
884
- def admin_convert_to_parquet(seasons):
885
- for season in seasons:
886
- load = pd.read_csv(f'pbp/csv/nhl_pbp_{season}.csv')
887
-
888
- load.to_parquet(f'pbp/parquet/nhl_pbp_{season}.parquet',index=False)