wsba-hockey 1.0.3__py3-none-any.whl → 1.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. wsba_hockey/__init__.py +1 -1
  2. wsba_hockey/data_pipelines.py +183 -0
  3. wsba_hockey/evidence/weakside-breakout/node_modules/duckdb/vendor.py +146 -0
  4. wsba_hockey/evidence/weakside-breakout/node_modules/flatted/python/flatted.py +149 -0
  5. wsba_hockey/evidence/weakside-breakout/node_modules/flatted/python/test.py +63 -0
  6. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/gyp_main.py +45 -0
  7. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSNew.py +367 -0
  8. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSProject.py +206 -0
  9. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSSettings.py +1270 -0
  10. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSSettings_test.py +1547 -0
  11. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSToolFile.py +59 -0
  12. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSUserFile.py +153 -0
  13. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSUtil.py +271 -0
  14. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSVersion.py +574 -0
  15. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/__init__.py +690 -0
  16. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/common.py +661 -0
  17. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/common_test.py +78 -0
  18. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/easy_xml.py +165 -0
  19. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/easy_xml_test.py +109 -0
  20. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/flock_tool.py +55 -0
  21. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/__init__.py +0 -0
  22. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/analyzer.py +808 -0
  23. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/android.py +1173 -0
  24. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/cmake.py +1321 -0
  25. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/compile_commands_json.py +120 -0
  26. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/dump_dependency_json.py +103 -0
  27. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/eclipse.py +464 -0
  28. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/gypd.py +89 -0
  29. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/gypsh.py +58 -0
  30. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/make.py +2714 -0
  31. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/msvs.py +3981 -0
  32. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/msvs_test.py +44 -0
  33. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/ninja.py +2936 -0
  34. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/ninja_test.py +55 -0
  35. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/xcode.py +1394 -0
  36. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/xcode_test.py +25 -0
  37. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/input.py +3130 -0
  38. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/input_test.py +98 -0
  39. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/mac_tool.py +771 -0
  40. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/msvs_emulation.py +1271 -0
  41. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/ninja_syntax.py +174 -0
  42. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/simple_copy.py +61 -0
  43. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/win_tool.py +374 -0
  44. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/xcode_emulation.py +1939 -0
  45. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/xcode_ninja.py +302 -0
  46. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/xcodeproj_file.py +3197 -0
  47. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/xml_fix.py +65 -0
  48. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/test_gyp.py +261 -0
  49. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/tools/graphviz.py +102 -0
  50. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/tools/pretty_gyp.py +156 -0
  51. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/tools/pretty_sln.py +181 -0
  52. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/tools/pretty_vcproj.py +339 -0
  53. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/test/fixtures/test-charmap.py +31 -0
  54. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/update-gyp.py +64 -0
  55. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/gyp_main.py +45 -0
  56. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSNew.py +367 -0
  57. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSProject.py +206 -0
  58. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSSettings.py +1270 -0
  59. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSSettings_test.py +1547 -0
  60. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSToolFile.py +59 -0
  61. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSUserFile.py +153 -0
  62. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSUtil.py +271 -0
  63. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSVersion.py +574 -0
  64. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/__init__.py +666 -0
  65. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/common.py +654 -0
  66. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/common_test.py +78 -0
  67. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/easy_xml.py +165 -0
  68. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/easy_xml_test.py +109 -0
  69. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/flock_tool.py +55 -0
  70. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/__init__.py +0 -0
  71. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/analyzer.py +808 -0
  72. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/android.py +1173 -0
  73. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/cmake.py +1321 -0
  74. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/compile_commands_json.py +120 -0
  75. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/dump_dependency_json.py +103 -0
  76. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/eclipse.py +464 -0
  77. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/gypd.py +89 -0
  78. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/gypsh.py +58 -0
  79. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/make.py +2518 -0
  80. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/msvs.py +3978 -0
  81. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/msvs_test.py +44 -0
  82. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/ninja.py +2936 -0
  83. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/ninja_test.py +55 -0
  84. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/xcode.py +1394 -0
  85. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/xcode_test.py +25 -0
  86. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/input.py +3137 -0
  87. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/input_test.py +98 -0
  88. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/mac_tool.py +771 -0
  89. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/msvs_emulation.py +1271 -0
  90. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/ninja_syntax.py +174 -0
  91. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/simple_copy.py +61 -0
  92. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/win_tool.py +374 -0
  93. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/xcode_emulation.py +1939 -0
  94. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/xcode_ninja.py +302 -0
  95. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/xcodeproj_file.py +3197 -0
  96. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/xml_fix.py +65 -0
  97. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/setup.py +42 -0
  98. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/test_gyp.py +260 -0
  99. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/tools/graphviz.py +102 -0
  100. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/tools/pretty_gyp.py +156 -0
  101. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/tools/pretty_sln.py +181 -0
  102. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/tools/pretty_vcproj.py +339 -0
  103. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/test/fixtures/test-charmap.py +31 -0
  104. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/update-gyp.py +46 -0
  105. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/game_stats/app.py +400 -0
  106. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/game_stats/name_fix.py +47 -0
  107. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/heatmaps/app.py +108 -0
  108. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/heatmaps/plot.py +93 -0
  109. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/heatmaps/rink_plot.py +245 -0
  110. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/matchups/app.py +145 -0
  111. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/matchups/plot.py +77 -0
  112. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/matchups/rink_plot.py +245 -0
  113. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/pbp/app.py +389 -0
  114. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/pbp/plot.py +70 -0
  115. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/pbp/rink_plot.py +245 -0
  116. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/skater/app.py +110 -0
  117. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/skater/plot.py +58 -0
  118. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/skater/rink_plot.py +245 -0
  119. wsba_hockey/tools/agg.py +243 -54
  120. wsba_hockey/tools/plotting.py +25 -25
  121. wsba_hockey/tools/scraping.py +154 -263
  122. wsba_hockey/tools/xg_model.py +369 -315
  123. wsba_hockey/workspace.py +22 -117
  124. wsba_hockey/wsba_main.py +499 -167
  125. {wsba_hockey-1.0.3.dist-info → wsba_hockey-1.0.5.dist-info}/METADATA +1 -1
  126. wsba_hockey-1.0.5.dist-info/RECORD +135 -0
  127. {wsba_hockey-1.0.3.dist-info → wsba_hockey-1.0.5.dist-info}/WHEEL +1 -1
  128. wsba_hockey/stats/calculate_viz/shot_impact.py +0 -2
  129. wsba_hockey-1.0.3.dist-info/RECORD +0 -19
  130. {wsba_hockey-1.0.3.dist-info → wsba_hockey-1.0.5.dist-info}/licenses/LICENSE +0 -0
  131. {wsba_hockey-1.0.3.dist-info → wsba_hockey-1.0.5.dist-info}/top_level.txt +0 -0
wsba_hockey/wsba_main.py CHANGED
@@ -1,9 +1,9 @@
1
+ import random
2
+ import os
1
3
  import requests as rs
2
4
  import pandas as pd
3
- import numpy as np
4
- from datetime import datetime, timedelta, date
5
5
  import time
6
- import random
6
+ from datetime import datetime, timedelta, date
7
7
  from .tools.scraping import *
8
8
  from .tools.xg_model import *
9
9
  from .tools.agg import *
@@ -59,11 +59,11 @@ convert_team_abbr = {'L.A':'LAK',
59
59
  'T.B':'TBL',
60
60
  'PHX':'ARI'}
61
61
 
62
- per_sixty = ['Fi','xGi','Gi','A1','A2','P1','P','FF','FA','xGF','xGA','GF','GA']
62
+ per_sixty = ['Fi','xGi','Gi','A1','A2','P1','P','OZF','NZF','DZF','FF','FA','xGF','xGA','GF','GA','CF','CA','HF','HA','Give','Take','Penl','Penl2','Penl5','Draw','Block']
63
63
 
64
64
  #Some games in the API are specifically known to cause errors in scraping.
65
65
  #This list is updated as frequently as necessary
66
- known_probs ={
66
+ known_probs = {
67
67
  '2007020011':'Missing shifts data for game between Chicago and Minnesota.',
68
68
  '2007021178':'Game between the Bruins and Sabres is missing data after the second period, for some reason.',
69
69
  '2008020259':'HTML data is completely missing for this game.',
@@ -73,15 +73,12 @@ known_probs ={
73
73
  '2009020658':'Missing shifts data for game between New York Islanders and Dallas.',
74
74
  '2009020885':'Missing shifts data for game between Sharks and Blue Jackets.',
75
75
  '2010020124':'Game between Capitals and Hurricanes is sporadically missing player on-ice data',
76
+ '2012020018':'HTML events contain mislabeled events.',
76
77
  '2013020971':'On March 10th, 2014, Stars forward Rich Peverley suffered from a cardiac episode midgame and as a result, the remainder of the game was postponed. \nThe game resumed on April 9th, and the only goal scorer in the game, Blue Jackets forward Nathan Horton, did not appear in the resumed game due to injury. Interestingly, Horton would never play in the NHL again.',
77
78
  '2018021133':'Game between Lightning and Capitals has incorrectly labeled event teams (i.e. WSH TAKEAWAY - #71 CIRELLI (Cirelli is a Tampa Bay skater in this game)).',
78
79
  '2019020876':'Due to the frightening collapse of Blues defensemen Jay Bouwmeester, a game on February 2nd, 2020 between the Ducks and Blues was postponed. \nWhen the game resumed, Ducks defensemen Hampus Lindholm, who assisted on a goal in the inital game, did not play in the resumed match.'
79
80
  }
80
81
 
81
- name_change = {
82
- "":"",
83
- }
84
-
85
82
  shot_types = ['wrist','deflected','tip-in','slap','backhand','snap','wrap-around','poke','bat','cradle','between-legs']
86
83
 
87
84
  new = 2024
@@ -107,14 +104,22 @@ standings_end = {
107
104
  '20242025':'04-17'
108
105
  }
109
106
 
107
+ events = ['faceoff','hit','giveaway','takeaway','blocked-shot','missed-shot','shot-on-goal','goal','penalty']
108
+
109
+ dir = os.path.dirname(os.path.realpath(__file__))
110
+ schedule_path = os.path.join(dir,'tools\\schedule\\schedule.csv')
111
+ info_path = os.path.join(dir,'tools\\teaminfo\\nhl_teaminfo.csv')
112
+ default_roster = os.path.join(dir,'tools\\rosters\\nhl_rosters.csv')
113
+
110
114
  ## SCRAPE FUNCTIONS ##
111
- def nhl_scrape_game(game_ids,split_shifts = False, remove = ['period-start','period-end','challenge','stoppage'],verbose = False, errors = False):
115
+ def nhl_scrape_game(game_ids,split_shifts = False, remove = ['period-start','period-end','challenge','stoppage','shootout-complete','game-end'],verbose = False, sources = False, errors = False):
112
116
  #Given a set of game_ids (NHL API), return complete play-by-play information as requested
113
117
  # param 'game_ids' - NHL game ids (or list formatted as ['random', num_of_games, start_year, end_year])
114
118
  # param 'split_shifts' - boolean which splits pbp and shift events if true
115
119
  # param 'remove' - list of events to remove from final dataframe
116
120
  # param 'xg' - xG model to apply to pbp for aggregation
117
121
  # param 'verbose' - boolean which adds additional event info if true
122
+ # param 'sources - boolean scraping the html and json sources to a master directory if true
118
123
  # param 'errors' - boolean returning game ids which did not scrape if true
119
124
 
120
125
  pbps = []
@@ -154,6 +159,7 @@ def nhl_scrape_game(game_ids,split_shifts = False, remove = ['period-start','per
154
159
  #Scrape each game
155
160
  #Track Errors
156
161
  error_ids = []
162
+ prog = 0
157
163
  for game_id in game_ids:
158
164
  print("Scraping data from game " + str(game_id) + "...",end="")
159
165
  start = time.perf_counter()
@@ -161,15 +167,25 @@ def nhl_scrape_game(game_ids,split_shifts = False, remove = ['period-start','per
161
167
  try:
162
168
  #Retrieve data
163
169
  info = get_game_info(game_id)
164
- data = combine_data(info)
170
+ data = combine_data(info, sources)
165
171
 
166
172
  #Append data to list
167
173
  pbps.append(data)
168
174
 
169
175
  end = time.perf_counter()
170
176
  secs = end - start
171
- print(f" finished in {secs:.2f} seconds.")
177
+ prog += 1
178
+
179
+ #Export if sources is true
180
+ if sources:
181
+ dirs = f'sources/{info['season']}/'
182
+
183
+ if not os.path.exists(dirs):
184
+ os.makedirs(dirs)
172
185
 
186
+ data.to_csv(f'{dirs}{info['game_id']}.csv',index=False)
187
+
188
+ print(f" finished in {secs:.2f} seconds. {prog}/{len(game_ids)} ({(prog/len(game_ids))*100:.2f}%)")
173
189
  except:
174
190
  #Games such as the all-star game and pre-season games will incur this error
175
191
  #Other games have known problems
@@ -180,7 +196,7 @@ def nhl_scrape_game(game_ids,split_shifts = False, remove = ['period-start','per
180
196
 
181
197
  #Track error
182
198
  error_ids.append(game_id)
183
-
199
+
184
200
  #Add all pbps together
185
201
  if len(pbps) == 0:
186
202
  print("\rNo data returned.")
@@ -256,30 +272,37 @@ def nhl_scrape_schedule(season,start = "09-01", end = "08-01"):
256
272
  #Handles dates which are over a year apart
257
273
  day = 365 + day
258
274
  for i in range(day):
259
- #For each day, call NHL api and retreive id, season, season_type (1,2,3), and gamecenter link
275
+ #For each day, call NHL api and retreive info on all games of selected game
260
276
  inc = start+timedelta(days=i)
261
277
  print("Scraping games on " + str(inc)[:10]+"...")
262
278
 
263
279
  get = rs.get(api+str(inc)[:10]).json()
264
- gameWeek = list(pd.json_normalize(get['gameWeek'])['games'])[0]
265
-
266
- for i in range(0,len(gameWeek)):
267
- game.append(pd.DataFrame({
268
- "id": [gameWeek[i]['id']],
269
- "season": [gameWeek[i]['season']],
270
- "season_type":[gameWeek[i]['gameType']],
271
- "away_team_abbr":[gameWeek[i]['awayTeam']['abbrev']],
272
- "home_team_abbr":[gameWeek[i]['homeTeam']['abbrev']],
273
- "gamecenter_link":[gameWeek[i]['gameCenterLink']]
274
- }))
275
-
280
+ gameWeek = pd.json_normalize(list(pd.json_normalize(get['gameWeek'])['games'])[0])
281
+
282
+ #Return nothing if there's nothing
283
+ if gameWeek.empty:
284
+ game.append(gameWeek)
285
+ else:
286
+ gameWeek['date'] = get['gameWeek'][0]['date']
287
+
288
+ gameWeek['season_type'] = gameWeek['gameType']
289
+ gameWeek['away_team_abbr'] = gameWeek['awayTeam.abbrev']
290
+ gameWeek['home_team_abbr'] = gameWeek['homeTeam.abbrev']
291
+ gameWeek['game_title'] = gameWeek['away_team_abbr'] + " @ " + gameWeek['home_team_abbr'] + " - " + gameWeek['date']
292
+ gameWeek['estStartTime'] = pd.to_datetime(gameWeek['startTimeUTC']).dt.tz_convert('US/Eastern').dt.strftime("%I:%M %p")
293
+
294
+ front_col = ['id','season','date','season_type','game_title','away_team_abbr','home_team_abbr','estStartTime']
295
+ gameWeek = gameWeek[front_col+[col for col in gameWeek.columns.to_list() if col not in front_col]]
296
+
297
+ game.append(gameWeek)
298
+
276
299
  #Concatenate all games
277
300
  df = pd.concat(game)
278
301
 
279
302
  #Return: specificed schedule data
280
303
  return df
281
304
 
282
- def nhl_scrape_season(season,split_shifts = False, season_types = [2,3], remove = ['period-start','period-end','game-end','challenge','stoppage'], start = "09-01", end = "08-01", local=False, local_path = "schedule/schedule.csv", verbose = False, errors = False):
305
+ def nhl_scrape_season(season,split_shifts = False, season_types = [2,3], remove = ['period-start','period-end','game-end','challenge','stoppage'], start = "09-01", end = "08-01", local=False, local_path = schedule_path, verbose = False, sources = False, errors = False):
283
306
  #Given season, scrape all play-by-play occuring within the season
284
307
  # param 'season' - NHL season to scrape
285
308
  # param 'split_shifts' - boolean which splits pbp and shift events if true
@@ -289,12 +312,21 @@ def nhl_scrape_season(season,split_shifts = False, season_types = [2,3], remove
289
312
  # param 'local' - boolean indicating whether to use local file to scrape game_ids
290
313
  # param 'local_path' - path of local file
291
314
  # param 'verbose' - boolean which adds additional event info if true
315
+ # param 'sources - boolean scraping the html and json sources to a master directory if true
292
316
  # param 'errors' - boolean returning game ids which did not scrape if true
293
317
 
294
318
  #Determine whether to use schedule data in repository or to scrape
295
- if local == True:
319
+ if local:
296
320
  load = pd.read_csv(local_path)
297
- load = load.loc[(load['season'].astype(str)==season)&(load['season_type'].isin(season_types))]
321
+ load['date'] = pd.to_datetime(load['date'])
322
+
323
+ start = f'{(season[0:4] if int(start[0:2])>=9 else season[4:8])}-{int(start[0:2])}-{int(start[3:5])}'
324
+ end = f'{(season[0:4] if int(end[0:2])>=9 else season[4:8])}-{int(end[0:2])}-{int(end[3:5])}'
325
+
326
+ load = load.loc[(load['season'].astype(str)==season)&
327
+ (load['season_type'].isin(season_types))&
328
+ (load['date']>=start)&(load['date']<=end)]
329
+
298
330
  game_ids = list(load['id'].astype(str))
299
331
  else:
300
332
  load = nhl_scrape_schedule(season,start,end)
@@ -310,31 +342,17 @@ def nhl_scrape_season(season,split_shifts = False, season_types = [2,3], remove
310
342
  start = time.perf_counter()
311
343
 
312
344
  #Perform scrape
313
- if split_shifts == True:
314
- data = nhl_scrape_game(game_ids,split_shifts=True,remove=remove,verbose=verbose,errors=errors)
345
+ if split_shifts:
346
+ data = nhl_scrape_game(game_ids,split_shifts=True,remove=remove,verbose=verbose,sources=sources,errors=errors)
315
347
  else:
316
- data = nhl_scrape_game(game_ids,remove=remove,verbose=verbose,errors=errors)
348
+ data = nhl_scrape_game(game_ids,remove=remove,verbose=verbose,sources=sources,errors=errors)
317
349
 
318
350
  end = time.perf_counter()
319
351
  secs = end - start
320
352
 
321
353
  print(f'Finished season scrape in {(secs/60)/60:.2f} hours.')
322
354
  #Return: Complete pbp and shifts data for specified season as well as dataframe of game_ids which failed to return data
323
- if split_shifts == True:
324
- pbp_dict = {'pbp':data['pbp'],
325
- 'shifts':data['shifts']}
326
-
327
- if errors:
328
- pbp_dict.update({'errors':data['errors']})
329
- return pbp_dict
330
- else:
331
- pbp = data
332
- if errors:
333
- pbp_dict = {'pbp':pbp,
334
- 'errors':data['errors']}
335
- return pbp_dict
336
- else:
337
- return pbp
355
+ return data
338
356
 
339
357
  def nhl_scrape_seasons_info(seasons = []):
340
358
  #Returns info related to NHL seasons (by default, all seasons are included)
@@ -390,7 +408,7 @@ def nhl_scrape_roster(season):
390
408
  #Given a nhl season, return rosters for all participating teams
391
409
  # param 'season' - NHL season to scrape
392
410
  print("Scrpaing rosters for the "+ season + "season...")
393
- teaminfo = pd.read_csv("teaminfo/nhl_teaminfo.csv")
411
+ teaminfo = pd.read_csv(info_path)
394
412
 
395
413
  rosts = []
396
414
  for team in list(teaminfo['Team']):
@@ -449,17 +467,24 @@ def nhl_scrape_team_info(country = False):
449
467
 
450
468
  return data.sort_values(by=(['country3Code','countryCode','iocCode','countryName'] if country else ['fullName','triCode','id']))
451
469
 
452
- def nhl_scrape_player_data(player_id):
470
+ def nhl_scrape_player_data(player_ids):
453
471
  #Given player id, return player information
454
- api = f'https://api-web.nhle.com/v1/player/{player_id}/landing'
472
+ infos = []
473
+ for player_id in player_ids:
474
+ player_id = int(player_id)
475
+ api = f'https://api-web.nhle.com/v1/player/{player_id}/landing'
476
+
477
+ data = pd.json_normalize(rs.get(api).json())
455
478
 
456
- data = pd.json_normalize(rs.get(api).json())
479
+ #Add name column
480
+ data['fullName'] = (data['firstName.default'] + " " + data['lastName.default']).str.upper()
457
481
 
458
- #Add name column
459
- data['fullName'] = (data['firstName.default'] + " " + data['lastName.default']).str.upper()
482
+ #Append
483
+ infos.append(data)
460
484
 
485
+ df = pd.concat(infos)
461
486
  #Return: player data
462
- return data
487
+ return df
463
488
 
464
489
  def nhl_scrape_draft_rankings(arg = 'now', category = ''):
465
490
  #Given url argument for timeframe and prospect category, return draft rankings
@@ -478,12 +503,24 @@ def nhl_scrape_draft_rankings(arg = 'now', category = ''):
478
503
  #Return: prospect rankings
479
504
  return data
480
505
 
481
- def nhl_shooting_impacts(agg,team=False):
506
+ def nhl_apply_xG(pbp):
507
+ #Given play-by-play data, return this data with xG-related columns
508
+
509
+ #param 'pbp' - play-by-play data
510
+
511
+ print(f'Applying WSBA xG to model with seasons: {pbp['season'].drop_duplicates().to_list()}')
512
+
513
+ #Apply xG model
514
+ pbp = wsba_xG(pbp)
515
+
516
+ return pbp
517
+
518
+ def nhl_shooting_impacts(agg,type):
482
519
  #Given stats table generated from the nhl_calculate_stats function, return table with shot impacts
483
520
  #Only 5v5 is supported as of now
484
521
 
485
522
  #param 'agg' - stats table
486
- #param 'team' - boolean determining if team stats are calculated instead of skater stats
523
+ #param 'type' - type of stats to calculate ('skater', 'goalie', or 'team')
487
524
 
488
525
  #COMPOSITE IMPACT EVALUATIONS:
489
526
 
@@ -509,7 +546,7 @@ def nhl_shooting_impacts(agg,team=False):
509
546
 
510
547
  return rate+qual+fini
511
548
 
512
- if team:
549
+ if type == 'goalie':
513
550
  pos = agg
514
551
  for group in [('OOFF','F'),('ODEF','A')]:
515
552
  #Have to set this columns for compatibility with df.apply
@@ -536,35 +573,172 @@ def nhl_shooting_impacts(agg,team=False):
536
573
  pos[f'{group[0]}-SRI'] = pos['g'] - pos.apply(lambda x: goal_comp(avg_fen,x.xg_fen,x.xg,x.g,avg_fsh),axis=1)
537
574
  pos[f'{group[0]}-SQI'] = pos['g'] - pos.apply(lambda x: goal_comp(x.fenwick,avg_xg_fen,x.xg,x.g,avg_fsh),axis=1)
538
575
  pos[f'{group[0]}-FNI'] = pos['g'] - pos.apply(lambda x: goal_comp(x.fenwick,x.xg_fen,avg_xg,avg_g,avg_fsh),axis=1)
576
+
577
+ #Convert impacts to totals
578
+ #Calculate shot rate, shot quality, and finishing impacts
579
+ pos[f'{group[0]}-SRI-T'] = (pos[f'{group[0]}-SRI']/60)*pos['TOI']
580
+ pos[f'{group[0]}-SQI-T'] = (pos[f'{group[0]}-SQI']/60)*pos['TOI']
581
+ pos[f'{group[0]}-FNI-T'] = (pos[f'{group[0]}-FNI']/60)*pos['TOI']
539
582
 
583
+ #Rank per 60 stats
584
+ for stat in ['FF','FA','xGF','xGA','GF','GA','CF','CA','GSAx']:
585
+ pos[f'{stat}/60-P'] = pos[f'{stat}/60'].rank(pct=True)
586
+
587
+ #Flip percentiles for against stats
588
+ for stat in ['FA','xGA','GA','CA']:
589
+ pos[f'{stat}/60-P'] = 1-pos[f'{stat}/60-P']
590
+
540
591
  #Add extra metrics
541
592
  pos['RushF/60'] = (pos['RushF']/pos['TOI'])*60
542
593
  pos['RushA/60'] = (pos['RushA']/pos['TOI'])*60
543
- pos['Rushes FF'] = pos['RushF/60'].rank(pct=True)
544
- pos['Rushes FA'] = 1 - pos['RushA/60'].rank(pct=True)
594
+ pos['RushesFF'] = pos['RushF/60'].rank(pct=True)
595
+ pos['RushesFA'] = 1 - pos['RushA/60'].rank(pct=True)
545
596
  pos['RushFxG/60'] = (pos['RushFxG']/pos['TOI'])*60
546
597
  pos['RushAxG/60'] = (pos['RushAxG']/pos['TOI'])*60
547
- pos['Rushes xGF'] = pos['RushFxG/60'].rank(pct=True)
548
- pos['Rushes xGA'] = 1 - pos['RushAxG/60'].rank(pct=True)
598
+ pos['RushesxGF'] = pos['RushFxG/60'].rank(pct=True)
599
+ pos['RushesxGA'] = 1 - pos['RushAxG/60'].rank(pct=True)
549
600
  pos['RushFG/60'] = (pos['RushFG']/pos['TOI'])*60
550
601
  pos['RushAG/60'] = (pos['RushAG']/pos['TOI'])*60
551
- pos['Rushes GF'] = pos['RushFG/60'].rank(pct=True)
552
- pos['Rushes GA'] = 1 - pos['RushAG/60'].rank(pct=True)
602
+ pos['RushesGF'] = pos['RushFG/60'].rank(pct=True)
603
+ pos['RushesGA'] = 1 - pos['RushAG/60'].rank(pct=True)
553
604
 
554
605
  #Flip against metric percentiles
555
606
  pos['ODEF-SR'] = 1-pos['ODEF-SR']
556
607
  pos['ODEF-SQ'] = 1-pos['ODEF-SQ']
557
608
  pos['ODEF-FN'] = 1-pos['ODEF-FN']
558
609
 
610
+ #Extraneous Values
611
+ pos['EGF'] = pos['OOFF-SRI']+pos['OOFF-SQI']+pos['OOFF-FNI']
612
+ pos['ExGF'] = pos['OOFF-SRI']+pos['OOFF-SQI']
613
+ pos['EGA'] = pos['ODEF-SRI']+pos['ODEF-SQI']+pos['ODEF-FNI']
614
+ pos['ExGA'] = pos['ODEF-SRI']+pos['ODEF-SQI']
615
+
616
+ #...and their percentiles
617
+ pos['EGF-P'] = pos['EGF'].rank(pct=True)
618
+ pos['ExGF-P'] = pos['ExGF'].rank(pct=True)
619
+ pos['EGA-P'] = pos['EGA'].rank(pct=True)
620
+ pos['ExGA-P'] = pos['ExGA'].rank(pct=True)
621
+
622
+ pos['EGA-P'] = 1-pos['EGA']
623
+ pos['ExGA-P'] = 1-pos['ExGA']
624
+
625
+ #...and then their totals
626
+ pos['EGF-T'] = (pos['EGF']/60)*pos['TOI']
627
+ pos['ExGF-T'] = (pos['ExGF']/60)*pos['TOI']
628
+ pos['EGA-T'] = (pos['EGA']/60)*pos['TOI']
629
+ pos['ExGA-T'] = (pos['ExGA']/60)*pos['TOI']
630
+
631
+ #Goal Composites...
632
+ pos['Team-Adjusted-EGI'] = pos['ODEF-FNI']-pos['ExGA']
633
+ pos['GISAx'] = pos['ExGA']-pos['EGA']
634
+ pos['NetGI'] = pos['EGF'] - pos['EGA']
635
+ pos['NetxGI'] = pos['ExGF'] - pos['ExGA']
636
+
637
+ #...and their percentiles
638
+ pos['Team-Adjusted-EGI-P'] = pos['Team-Adjusted-EGI'].rank(pct=True)
639
+ pos['GISAx-P'] = pos['GISAx'].rank(pct=True)
640
+ pos['NetGI-P'] = pos['NetGI'].rank(pct=True)
641
+ pos['NetxGI-P'] = pos['NetxGI'].rank(pct=True)
642
+
643
+ #...and then their totals
644
+ pos['Team-Adjusted-EGI-T'] = (pos['Team-Adjusted-EGI']/60)*pos['TOI']
645
+ pos['GISAx-T'] = (pos['GISAx']/60)*pos['TOI']
646
+ pos['NetGI-T'] = (pos['NetGI']/60)*pos['TOI']
647
+ pos['NetxGI-T'] = (pos['NetxGI']/60)*pos['TOI']
648
+
559
649
  #Return: team stats with shooting impacts
560
- return pos.drop(columns=['fsh','fenwick','xg_fen','xg','g','finishing']).sort_values(['Season','Team'])
650
+ return pos.drop(columns=['fsh','fenwick','xg_fen','xg','g','finishing']).sort_values(['Goalie','Season','Team'])
561
651
 
652
+ elif type =='team':
653
+ pos = agg
654
+ for group in [('OOFF','F'),('ODEF','A')]:
655
+ #Have to set this columns for compatibility with df.apply
656
+ pos['fsh'] = pos[f'Fsh{group[1]}%']
657
+ pos['fenwick'] = pos[f'F{group[1]}/60']
658
+ pos['xg'] = pos[f'xG{group[1]}/60']
659
+ pos['g'] = pos[f'G{group[1]}/60']
660
+ pos['xg_fen'] = pos[f'xG{group[1]}/F{group[1]}']
661
+ pos['finishing'] = pos[f'G{group[1]}/xG{group[1]}']
662
+
663
+ #Find average for position in frame
664
+ avg_fen = pos['fenwick'].mean()
665
+ avg_xg = pos['xg'].mean()
666
+ avg_g = pos['g'].mean()
667
+ avg_fsh = avg_g/avg_fen
668
+ avg_xg_fen = avg_xg/avg_fen
669
+
670
+ #Calculate composite percentiles
671
+ pos[f'{group[0]}-SR'] = pos['fenwick'].rank(pct=True)
672
+ pos[f'{group[0]}-SQ'] = pos['xg_fen'].rank(pct=True)
673
+ pos[f'{group[0]}-FN'] = pos['finishing'].rank(pct=True)
674
+
675
+ #Calculate shot rate, shot quality, and finishing impacts
676
+ pos[f'{group[0]}-SRI'] = pos['g'] - pos.apply(lambda x: goal_comp(avg_fen,x.xg_fen,x.xg,x.g,avg_fsh),axis=1)
677
+ pos[f'{group[0]}-SQI'] = pos['g'] - pos.apply(lambda x: goal_comp(x.fenwick,avg_xg_fen,x.xg,x.g,avg_fsh),axis=1)
678
+ pos[f'{group[0]}-FNI'] = pos['g'] - pos.apply(lambda x: goal_comp(x.fenwick,x.xg_fen,avg_xg,avg_g,avg_fsh),axis=1)
679
+
680
+ #Convert impacts to totals
681
+ #Calculate shot rate, shot quality, and finishing impacts
682
+ pos[f'{group[0]}-SRI-T'] = (pos[f'{group[0]}-SRI']/60)*pos['TOI']
683
+ pos[f'{group[0]}-SQI-T'] = (pos[f'{group[0]}-SQI']/60)*pos['TOI']
684
+ pos[f'{group[0]}-FNI-T'] = (pos[f'{group[0]}-FNI']/60)*pos['TOI']
685
+
686
+ #Rank per 60 stats
687
+ for stat in per_sixty[10:len(per_sixty)]:
688
+ pos[f'{stat}/60-P'] = pos[f'{stat}/60'].rank(pct=True)
689
+
690
+ #Flip percentiles for against stats
691
+ for stat in ['FA','xGA','GA','CA','HA','Give','Penl','Penl2','Penl5']:
692
+ pos[f'{stat}/60-P'] = 1-pos[f'{stat}/60-P']
693
+
694
+ #Add extra metrics
695
+ pos['RushF/60'] = (pos['RushF']/pos['TOI'])*60
696
+ pos['RushA/60'] = (pos['RushA']/pos['TOI'])*60
697
+ pos['RushesFF'] = pos['RushF/60'].rank(pct=True)
698
+ pos['RushesFA'] = 1 - pos['RushA/60'].rank(pct=True)
699
+ pos['RushFxG/60'] = (pos['RushFxG']/pos['TOI'])*60
700
+ pos['RushAxG/60'] = (pos['RushAxG']/pos['TOI'])*60
701
+ pos['RushesxGF'] = pos['RushFxG/60'].rank(pct=True)
702
+ pos['RushesxGA'] = 1 - pos['RushAxG/60'].rank(pct=True)
703
+ pos['RushFG/60'] = (pos['RushFG']/pos['TOI'])*60
704
+ pos['RushAG/60'] = (pos['RushAG']/pos['TOI'])*60
705
+ pos['RushesGF'] = pos['RushFG/60'].rank(pct=True)
706
+ pos['RushesGA'] = 1 - pos['RushAG/60'].rank(pct=True)
707
+
708
+ #Flip against metric percentiles
709
+ pos['ODEF-SR'] = 1-pos['ODEF-SR']
710
+ pos['ODEF-SQ'] = 1-pos['ODEF-SQ']
711
+ pos['ODEF-FN'] = 1-pos['ODEF-FN']
712
+
713
+ pos['EGF'] = pos['OOFF-SRI']+pos['OOFF-SQI']+pos['OOFF-FNI']
714
+ pos['ExGF'] = pos['OOFF-SRI']+pos['OOFF-SQI']
715
+ pos['EGA'] = pos['ODEF-SRI']+pos['ODEF-SQI']+pos['ODEF-FNI']
716
+ pos['ExGA'] = pos['ODEF-SRI']+pos['ODEF-SQI']
717
+
718
+ #...and their percentiles
719
+ pos['EGF-P'] = pos['EGF'].rank(pct=True)
720
+ pos['ExGF-P'] = pos['ExGF'].rank(pct=True)
721
+ pos['EGA-P'] = pos['EGA'].rank(pct=True)
722
+ pos['ExGA-P'] = pos['ExGA'].rank(pct=True)
723
+
724
+ pos['EGA-P'] = 1-pos['EGA']
725
+ pos['ExGA-P'] = 1-pos['ExGA']
726
+
727
+ #...and then their totals
728
+ pos['EGF-T'] = (pos['EGF']/60)*pos['TOI']
729
+ pos['ExGF-T'] = (pos['ExGF']/60)*pos['TOI']
730
+ pos['EGA-T'] = (pos['EGA']/60)*pos['TOI']
731
+ pos['ExGA-T'] = (pos['ExGA']/60)*pos['TOI']
732
+
733
+ #Return: team stats with shooting impacts
734
+ return pos.drop(columns=['fsh','fenwick','xg_fen','xg','g','finishing']).sort_values(['Season','Team'])
562
735
 
563
736
  else:
564
737
  #Remove skaters with less than 150 minutes of TOI then split between forwards and dmen
565
- agg = agg.loc[agg['TOI']>=150]
566
- forwards = agg.loc[agg['Position']!='D']
567
- defensemen = agg.loc[agg['Position']=='D']
738
+ #These are added back in after the fact
739
+ forwards = agg.loc[(agg['Position']!='D')&(agg['TOI']>=150)]
740
+ defensemen = agg.loc[(agg['Position']=='D')&(agg['TOI']>=150)]
741
+ non_players = agg.loc[agg['TOI']<150]
568
742
 
569
743
  #Loop through both positions, all groupings (INDV, OOFF, and ODEF) generating impacts
570
744
  for pos in [forwards,defensemen]:
@@ -594,15 +768,29 @@ def nhl_shooting_impacts(agg,team=False):
594
768
  pos[f'{group[0]}-SQI'] = pos['g'] - pos.apply(lambda x: goal_comp(x.fenwick,avg_xg_fen,x.xg,x.g,avg_fsh),axis=1)
595
769
  pos[f'{group[0]}-FNI'] = pos['g'] - pos.apply(lambda x: goal_comp(x.fenwick,x.xg_fen,avg_xg,avg_g,avg_fsh),axis=1)
596
770
 
771
+ #Convert impacts to totals
772
+ #Calculate shot rate, shot quality, and finishing impacts
773
+ pos[f'{group[0]}-SRI-T'] = (pos[f'{group[0]}-SRI']/60)*pos['TOI']
774
+ pos[f'{group[0]}-SQI-T'] = (pos[f'{group[0]}-SQI']/60)*pos['TOI']
775
+ pos[f'{group[0]}-FNI-T'] = (pos[f'{group[0]}-FNI']/60)*pos['TOI']
776
+
597
777
  #Calculate On-Ice Involvement Percentiles
598
- pos['Fenwick'] = pos['FC%'].rank(pct=True)
599
- pos['xG'] = pos['xGC%'].rank(pct=True)
600
- pos['Goal Factor'] = pos['GI%'].rank(pct=True)
601
- pos['Goal Scoring'] = pos['GC%'].rank(pct=True)
602
- pos['Rush/60'] = (pos['Rush']/pos['TOI'])*60
603
- pos['RushxG/60'] = (pos['Rush xG']/pos['TOI'])*60
604
- pos['Rushes xG'] = pos['RushxG/60'].rank(pct=True)
605
- pos['Rushes FF'] = pos['Rush/60'].rank(pct=True)
778
+ pos['Fi/F'] = pos['FC%'].rank(pct=True)
779
+ pos['xGi/F'] = pos['xGC%'].rank(pct=True)
780
+ pos['Pi/F'] = pos['GI%'].rank(pct=True)
781
+ pos['Gi/F'] = pos['GC%'].rank(pct=True)
782
+ pos['RushFi/60'] = (pos['Rush']/pos['TOI'])*60
783
+ pos['RushxGi/60'] = (pos['Rush xG']/pos['TOI'])*60
784
+ pos['RushesxGi'] = pos['RushxGi/60'].rank(pct=True)
785
+ pos['RushesFi'] = pos['RushFi/60'].rank(pct=True)
786
+
787
+ #Rank per 60 stats
788
+ for stat in per_sixty:
789
+ pos[f'{stat}/60-P'] = pos[f'{stat}/60'].rank(pct=True)
790
+
791
+ #Flip percentiles for against stats
792
+ for stat in ['FA','xGA','GA','CA','HA','Give','Penl','Penl2','Penl5']:
793
+ pos[f'{stat}/60-P'] = 1-pos[f'{stat}/60-P']
606
794
 
607
795
  #Add positions back together
608
796
  complete = pd.concat([forwards,defensemen])
@@ -613,108 +801,253 @@ def nhl_shooting_impacts(agg,team=False):
613
801
  complete['ODEF-FN'] = 1-complete['ODEF-FN']
614
802
 
615
803
  #Extraneous Values
616
- complete['Extraneous Gi'] = complete['INDV-SRI']+complete['INDV-SQI']+complete['INDV-FNI']
617
- complete['Extraneous xGi'] = complete['INDV-SRI']+complete['INDV-SQI']
618
- complete['Extraneous GF'] = complete['OOFF-SRI']+complete['OOFF-SQI']+complete['OOFF-FNI']
619
- complete['Extraneous xGF'] = complete['OOFF-SRI']+complete['OOFF-SQI']
620
- complete['Extraneous GA'] = complete['ODEF-SRI']+complete['ODEF-SQI']+complete['ODEF-FNI']
621
- complete['Extraneous xGA'] = complete['ODEF-SRI']+complete['ODEF-SQI']
622
-
623
- #Goal Composites
624
- complete['Linemate Extraneous Goals'] = complete['Extraneous GF'] - complete['Extraneous Gi']
625
- complete['Linemate Goal Induction'] = complete['Linemate Extraneous Goals']*complete['AC%']
626
- complete['Composite Goal Impact'] = complete['Extraneous Gi'] + complete['Linemate Goal Induction']
627
- complete['Linemate Rel. Goal Impact'] = complete['Composite Goal Impact'] - (complete['Extraneous GF']-complete['Composite Goal Impact'])
628
- complete['Net Goal Impact'] = complete['Extraneous GF'] - complete['Extraneous GA']
629
- complete['Net xGoal Impact'] = complete['Extraneous xGF'] - complete['Extraneous xGA']
630
-
804
+ complete['EGi'] = complete['INDV-SRI']+complete['INDV-SQI']+complete['INDV-FNI']
805
+ complete['ExGi'] = complete['INDV-SRI']+complete['INDV-SQI']
806
+ complete['EGF'] = complete['OOFF-SRI']+complete['OOFF-SQI']+complete['OOFF-FNI']
807
+ complete['ExGF'] = complete['OOFF-SRI']+complete['OOFF-SQI']
808
+ complete['EGA'] = complete['ODEF-SRI']+complete['ODEF-SQI']+complete['ODEF-FNI']
809
+ complete['ExGA'] = complete['ODEF-SRI']+complete['ODEF-SQI']
810
+
811
+ #...and their percentiles
812
+ complete['EGi-P'] = complete['EGi'].rank(pct=True)
813
+ complete['ExGi-P'] = complete['ExGi'].rank(pct=True)
814
+ complete['EGF-P'] = complete['EGF'].rank(pct=True)
815
+ complete['ExGF-P'] = complete['ExGF'].rank(pct=True)
816
+ complete['EGA-P'] = complete['EGA'].rank(pct=True)
817
+ complete['ExGA-P'] = complete['ExGA'].rank(pct=True)
818
+
819
+ complete['EGA-P'] = 1-complete['EGA']
820
+ complete['ExGA-P'] = 1-complete['ExGA']
821
+
822
+ #...and then their totals
823
+ complete['EGi-T'] = (complete['EGi']/60)*complete['TOI']
824
+ complete['ExGi-T'] = (complete['ExGi']/60)*complete['TOI']
825
+ complete['EGF-T'] = (complete['EGF']/60)*complete['TOI']
826
+ complete['ExGF-T'] = (complete['ExGF']/60)*complete['TOI']
827
+ complete['EGA-T'] = (complete['EGA']/60)*complete['TOI']
828
+ complete['ExGA-T'] = (complete['ExGA']/60)*complete['TOI']
829
+
830
+ #Goal Composites...
831
+ complete['LiEG'] = complete['EGF'] - complete['EGi']
832
+ complete['LiExG'] = complete['ExGF'] - complete['ExGi']
833
+ complete['LiGIn'] = complete['LiEG']*complete['AC%']
834
+ complete['LixGIn'] = complete['LiExG']*complete['AC%']
835
+ complete['ALiGIn'] = complete['LiGIn']-complete['LixGIn']
836
+ complete['CompGI'] = complete['EGi'] + complete['LiGIn']
837
+ complete['LiRelGI'] = complete['CompGI'] - (complete['EGF']-complete['CompGI'])
838
+ complete['NetGI'] = complete['EGF'] - complete['EGA']
839
+ complete['NetxGI'] = complete['ExGF'] - complete['ExGA']
840
+
841
+ #...and their percentiles
842
+ complete['LiEG-P'] = complete['LiEG'].rank(pct=True)
843
+ complete['LiExG-P'] = complete['LiExG'].rank(pct=True)
844
+ complete['LiGIn-P'] = complete['LiGIn'].rank(pct=True)
845
+ complete['LixGIn-P'] = complete['LixGIn'].rank(pct=True)
846
+ complete['ALiGIn-P'] = complete['ALiGIn'].rank(pct=True)
847
+ complete['CompGI-P'] = complete['CompGI'].rank(pct=True)
848
+ complete['LiRelGI-P'] = complete['LiRelGI'].rank(pct=True)
849
+ complete['NetGI-P'] = complete['NetGI'].rank(pct=True)
850
+ complete['NetxGI-P'] = complete['NetxGI'].rank(pct=True)
851
+
852
+ #..and then their totals
853
+ complete['LiEG-T'] = (complete['LiEG']/60)*complete['TOI']
854
+ complete['LiExG-T'] = (complete['LiExG']/60)*complete['TOI']
855
+ complete['LiGIn-T'] = (complete['LiGIn']/60)*complete['TOI']
856
+ complete['LixGIn-T'] = (complete['LixGIn']/60)*complete['TOI']
857
+ complete['ALiGIn-T'] = (complete['ALiGIn']/60)*complete['TOI']
858
+ complete['CompGI-T'] = (complete['CompGI']/60)*complete['TOI']
859
+ complete['LiRelGI-T'] = (complete['LiRelGI']/60)*complete['TOI']
860
+ complete['NetGI-T'] = (complete['NetGI']/60)*complete['TOI']
861
+ complete['NetxGI-T'] = (complete['NetxGI']/60)*complete['TOI']
862
+
863
+ #Add back skaters with less than 150 minutes TOI
864
+ df = pd.concat([complete,non_players]).drop(columns=['fsh','fenwick','xg_fen','xg','g','finishing']).sort_values(['Player','Season','Team','ID'])
631
865
  #Return: skater stats with shooting impacts
632
- return complete.drop(columns=['fsh','fenwick','xg_fen','xg','g','finishing']).sort_values(['Player','Season','Team','ID'])
866
+ return df
633
867
 
634
- def nhl_calculate_stats(pbp,type,season_types,game_strength,roster_path="rosters/nhl_rosters.csv",xg="moneypuck",shot_impact=False):
868
+ def nhl_calculate_stats(pbp,type,season_types,game_strength,split_game=False,roster_path=default_roster,shot_impact=False):
635
869
  #Given play-by-play, seasonal information, game_strength, rosters, and xG model, return aggregated stats
636
870
  # param 'pbp' - play-by-play dataframe
637
- # param 'type' - type of stats to calculate ('skater', 'goaltender', or 'team')
871
+ # param 'type' - type of stats to calculate ('skater', 'goalie', or 'team')
638
872
  # param 'season' - season or timeframe of events in play-by-play
639
873
  # param 'season_type' - list of season types (preseason, regular season, or playoffs) to include in aggregation
640
874
  # param 'game_strength' - list of game_strengths to include in aggregation
875
+ # param 'split_game' - boolean which if true groups aggregation by game
641
876
  # param 'roster_path' - path to roster file
642
- # param 'xg' - xG model to apply to pbp for aggregation
643
877
  # param 'shot_impact' - boolean determining if the shot impact model will be applied to the dataset
644
878
 
645
- print(f"Calculating statistics for all games in the provided play-by-play data for {type}s...\nSeasons included: {pbp['season'].drop_duplicates().to_list()}...")
879
+ print(f"Calculating statistics for all games in the provided play-by-play data at {game_strength} for {type}s...\nSeasons included: {pbp['season'].drop_duplicates().to_list()}...")
646
880
  start = time.perf_counter()
647
881
 
648
- #Add extra data and apply team changes
649
- pbp = prep_xG_data(pbp).replace(convert_team_abbr)
650
-
651
882
  #Check if xG column exists and apply model if it does not
652
883
  try:
653
884
  pbp['xG']
654
- except KeyError:
655
- if xg == 'wsba':
656
- pbp = wsba_xG(pbp)
657
- else:
658
- pbp = moneypuck_xG(pbp)
885
+ except KeyError:
886
+ pbp = wsba_xG(pbp)
659
887
 
660
- #Filter by season types and remove shootouts
661
- pbp = pbp.loc[(pbp['season_type'].isin(season_types)) & (pbp['period'] < 5)]
888
+ #Filter by season types, remove shootouts, remove shots with no coordinates, and remove shots on empty nets
889
+ pbp_noshot = pbp.loc[(pbp['season_type'].isin(season_types)) & ~(pbp['event_type'].isin(fenwick_events))]
890
+
891
+ #Include everything when strengths is set to 'all'
892
+ if game_strength == 'all':
893
+ mask = ((pbp['event_type'].isin(fenwick_events)) & (pbp['empty_net']<1))
894
+ else:
895
+ mask = ((pbp['event_type'].isin(fenwick_events)) & (pbp['empty_net']<1) & (pbp['x'].notna()) & (pbp['y'].notna()))
662
896
 
897
+ pbp_shot = pbp.loc[(pbp['season_type'].isin(season_types)) & mask]
898
+
899
+ pbp = pd.concat([pbp_shot,pbp_noshot])
900
+
663
901
  #Convert all columns with player ids to float in order to avoid merging errors
664
902
  for col in get_col():
665
903
  if "_id" in col:
666
904
  try: pbp[col] = pbp[col].astype(float)
667
905
  except KeyError: continue
668
906
 
669
- # Filter by game strength if not "all"
670
- if game_strength != "all":
671
- pbp = pbp.loc[pbp['strength_state'].isin(game_strength)]
907
+ #Split by game if specified
908
+ if split_game:
909
+ second_group = ['season','game_id']
910
+ else:
911
+ second_group = ['season']
672
912
 
673
913
  #Split calculation
674
- if type == 'team':
675
- complete = calc_team(pbp)
676
-
677
- #WSBA
678
- complete['WSBA'] = complete['Team']+complete['Season'].astype(str)
914
+ if type == 'goalie':
915
+ complete = calc_goalie(pbp,game_strength,second_group)
679
916
 
680
917
  #Set TOI to minute
681
918
  complete['TOI'] = complete['TOI']/60
682
919
 
683
920
  #Add per 60 stats
684
- for stat in per_sixty[7:13]:
921
+ for stat in ['FF','FA','xGF','xGA','GF','GA','CF','CA','GSAx']:
685
922
  complete[f'{stat}/60'] = (complete[stat]/complete['TOI'])*60
923
+
924
+ complete['GF%'] = complete['GF']/(complete['GF']+complete['GA'])
925
+ complete['xGF%'] = complete['xGF']/(complete['xGF']+complete['xGA'])
926
+ complete['FF%'] = complete['FF']/(complete['FF']+complete['FA'])
927
+ complete['CF%'] = complete['CF']/(complete['CF']+complete['CA'])
686
928
 
687
- #Rank per 60 stats
688
- for stat in per_sixty[7:13]:
689
- complete[f'{stat}/60 Percentile'] = complete[f'{stat}/60'].rank(pct=True)
929
+ #Remove entries with no ID listed
930
+ complete = complete.loc[complete['ID'].notna()]
690
931
 
691
- #Flip percentiles for against stats
692
- for stat in ['FA','xGA','GA']:
693
- complete[f'{stat}/60 Percentile'] = 1-complete[f'{stat}/60 Percentile']
932
+ #Import rosters and player info
933
+ rosters = pd.read_csv(roster_path)
934
+ names = rosters[['id','fullName',
935
+ 'headshot','positionCode','shootsCatches',
936
+ 'heightInInches','weightInPounds',
937
+ 'birthDate','birthCountry']].drop_duplicates(subset=['id','fullName'],keep='last')
938
+
939
+ #Add names
940
+ complete = pd.merge(complete,names,how='left',left_on='ID',right_on='id')
941
+
942
+ #Rename if there are no missing names
943
+ complete = complete.rename(columns={'fullName':'Goalie',
944
+ 'headshot':'Headshot',
945
+ 'positionCode':'Position',
946
+ 'shootsCatches':'Handedness',
947
+ 'heightInInches':'Height (in)',
948
+ 'weightInPounds':'Weight (lbs)',
949
+ 'birthDate':'Birthday',
950
+ 'birthCountry':'Nationality'})
951
+
952
+ #WSBA
953
+ complete['WSBA'] = complete['Goalie']+complete['Team']+complete['Season'].astype(str)
954
+
955
+ #Add player age
956
+ complete['Birthday'] = pd.to_datetime(complete['Birthday'])
957
+ complete['season_year'] = complete['Season'].astype(str).str[4:8].astype(int)
958
+ complete['Age'] = complete['season_year'] - complete['Birthday'].dt.year
959
+
960
+ #Find player headshot
961
+ complete['Headshot'] = 'https://assets.nhle.com/mugs/nhl/'+complete['Season'].astype(str)+'/'+complete['Team']+'/'+complete['ID'].astype(int).astype(str)+'.png'
694
962
 
695
963
  end = time.perf_counter()
696
964
  length = end-start
697
965
  print(f'...finished in {(length if length <60 else length/60):.2f} {'seconds' if length <60 else 'minutes'}.')
698
- #Apply shot impacts if necessary (Note: this will remove skaters with fewer than 150 minutes of TOI due to the shot impact TOI rule)
966
+
967
+ head = ['Goalie','ID','Game'] if 'Game' in complete.columns else ['Goalie','ID']
968
+ complete = complete[head+[
969
+ "Season","Team",'WSBA',
970
+ 'Headshot','Position','Handedness',
971
+ 'Height (in)','Weight (lbs)',
972
+ 'Birthday','Age','Nationality',
973
+ 'GP','TOI',
974
+ "GF","FF","xGF","xGF/FF","GF/xGF","FshF%",
975
+ "GA","FA","xGA","xGA/FA","GA/xGA","FshA%",
976
+ 'CF','CA',
977
+ 'GSAx',
978
+ 'RushF','RushA','RushFxG','RushAxG','RushFG','RushAG'
979
+ ]+[f'{stat}/60' for stat in ['FF','FA','xGF','xGA','GF','GA','CF','CA','GSAx']]]
980
+
981
+ #Apply shot impacts if necessary
699
982
  if shot_impact:
700
- return nhl_shooting_impacts(complete,True)
701
- else:
702
- return complete
983
+ complete = nhl_shooting_impacts(complete,'goalie')
984
+
985
+ end = time.perf_counter()
986
+ length = end-start
987
+ print(f'...finished in {(length if length <60 else length/60):.2f} {'seconds' if length <60 else 'minutes'}.')
988
+
989
+ return complete
990
+
991
+ elif type == 'team':
992
+ complete = calc_team(pbp,game_strength,second_group)
993
+
994
+ #WSBA
995
+ complete['WSBA'] = complete['Team']+complete['Season'].astype(str)
996
+
997
+ #Set TOI to minute
998
+ complete['TOI'] = complete['TOI']/60
999
+
1000
+ #Add per 60 stats
1001
+ for stat in per_sixty[10:len(per_sixty)]:
1002
+ complete[f'{stat}/60'] = (complete[stat]/complete['TOI'])*60
1003
+
1004
+ complete['GF%'] = complete['GF']/(complete['GF']+complete['GA'])
1005
+ complete['xGF%'] = complete['xGF']/(complete['xGF']+complete['xGA'])
1006
+ complete['FF%'] = complete['FF']/(complete['FF']+complete['FA'])
1007
+ complete['CF%'] = complete['CF']/(complete['CF']+complete['CA'])
1008
+
1009
+ head = ['Team','Game'] if 'Game' in complete.columns else ['Team']
1010
+ complete = complete[head+[
1011
+ 'Season','WSBA',
1012
+ 'GP','TOI',
1013
+ "GF","FF","xGF","xGF/FF","GF/xGF","FshF%",
1014
+ "GA","FA","xGA","xGA/FA","GA/xGA","FshA%",
1015
+ 'CF','CA',
1016
+ 'GF%','FF%','xGF%','CF%',
1017
+ 'HF','HA','HF%',
1018
+ 'Penl','Penl2','Penl5','PIM','Draw','PENL%',
1019
+ 'Give','Take','PM%',
1020
+ 'Block',
1021
+ 'RushF','RushA','RushFxG','RushAxG','RushFG','RushAG'
1022
+ ]+[f'{stat}/60' for stat in per_sixty[10:len(per_sixty)]]]
1023
+ #Apply shot impacts if necessary
1024
+ if shot_impact:
1025
+ complete = nhl_shooting_impacts(complete,'team')
1026
+
1027
+ end = time.perf_counter()
1028
+ length = end-start
1029
+ print(f'...finished in {(length if length <60 else length/60):.2f} {'seconds' if length <60 else 'minutes'}.')
1030
+
1031
+ return complete
703
1032
  else:
704
- indv_stats = calc_indv(pbp)
705
- onice_stats = calc_onice(pbp)
1033
+ indv_stats = calc_indv(pbp,game_strength,second_group)
1034
+ onice_stats = calc_onice(pbp,game_strength,second_group)
706
1035
 
707
1036
  #IDs sometimes set as objects
708
1037
  indv_stats['ID'] = indv_stats['ID'].astype(float)
709
1038
  onice_stats['ID'] = onice_stats['ID'].astype(float)
710
1039
 
711
1040
  #Merge and add columns for extra stats
712
- complete = pd.merge(indv_stats,onice_stats,how="outer",on=['ID','Team','Season'])
1041
+ complete = pd.merge(indv_stats,onice_stats,how="outer",on=['ID','Team','Season']+(['Game'] if 'game_id' in second_group else []))
713
1042
  complete['GC%'] = complete['Gi']/complete['GF']
714
1043
  complete['AC%'] = (complete['A1']+complete['A2'])/complete['GF']
715
1044
  complete['GI%'] = (complete['Gi']+complete['A1']+complete['A2'])/complete['GF']
716
1045
  complete['FC%'] = complete['Fi']/complete['FF']
717
1046
  complete['xGC%'] = complete['xGi']/complete['xGF']
1047
+ complete['GF%'] = complete['GF']/(complete['GF']+complete['GA'])
1048
+ complete['xGF%'] = complete['xGF']/(complete['xGF']+complete['xGA'])
1049
+ complete['FF%'] = complete['FF']/(complete['FF']+complete['FA'])
1050
+ complete['CF%'] = complete['CF']/(complete['CF']+complete['CA'])
718
1051
 
719
1052
  #Remove entries with no ID listed
720
1053
  complete = complete.loc[complete['ID'].notna()]
@@ -742,18 +1075,6 @@ def nhl_calculate_stats(pbp,type,season_types,game_strength,roster_path="rosters
742
1075
  #Set TOI to minute
743
1076
  complete['TOI'] = complete['TOI']/60
744
1077
 
745
- #Add per 60 stats
746
- for stat in per_sixty:
747
- complete[f'{stat}/60'] = (complete[stat]/complete['TOI'])*60
748
-
749
- #Rank per 60 stats
750
- for stat in per_sixty:
751
- complete[f'{stat}/60 Percentile'] = complete[f'{stat}/60'].rank(pct=True)
752
-
753
- #Flip percentiles for against stats
754
- for stat in ['FA','xGA','GA']:
755
- complete[f'{stat}/60 Percentile'] = 1-complete[f'{stat}/60 Percentile']
756
-
757
1078
  #Add player age
758
1079
  complete['Birthday'] = pd.to_datetime(complete['Birthday'])
759
1080
  complete['season_year'] = complete['Season'].astype(str).str[4:8].astype(int)
@@ -762,42 +1083,56 @@ def nhl_calculate_stats(pbp,type,season_types,game_strength,roster_path="rosters
762
1083
  #Find player headshot
763
1084
  complete['Headshot'] = 'https://assets.nhle.com/mugs/nhl/'+complete['Season'].astype(str)+'/'+complete['Team']+'/'+complete['ID'].astype(int).astype(str)+'.png'
764
1085
 
765
- end = time.perf_counter()
766
- length = end-start
767
1086
  #Remove goalies that occasionally appear in a set
768
1087
  complete = complete.loc[complete['Position']!='G']
769
1088
  #Add WSBA ID
770
1089
  complete['WSBA'] = complete['Player']+complete['Season'].astype(str)+complete['Team']
771
1090
 
1091
+ #Add per 60 stats
1092
+ for stat in per_sixty:
1093
+ complete[f'{stat}/60'] = (complete[stat]/complete['TOI'])*60
1094
+
772
1095
  #Shot Type Metrics
773
1096
  type_metrics = []
774
1097
  for type in shot_types:
775
1098
  for stat in per_sixty[:3]:
776
1099
  type_metrics.append(f'{type.capitalize()}{stat}')
777
1100
 
778
- complete = complete[[
779
- 'Player','ID',
1101
+ head = ['Player','ID','Game'] if 'Game' in complete.columns else ['Player','ID']
1102
+ complete = complete[head+[
780
1103
  "Season","Team",'WSBA',
781
1104
  'Headshot','Position','Handedness',
782
1105
  'Height (in)','Weight (lbs)',
783
1106
  'Birthday','Age','Nationality',
784
1107
  'GP','TOI',
785
1108
  "Gi","A1","A2",'P1','P',
1109
+ 'Give','Take','PM%','HF','HA','HF%',
786
1110
  "Fi","xGi",'xGi/Fi',"Gi/xGi","Fshi%",
787
1111
  "GF","FF","xGF","xGF/FF","GF/xGF","FshF%",
788
1112
  "GA","FA","xGA","xGA/FA","GA/xGA","FshA%",
1113
+ 'Ci','CF','CA','CF%',
1114
+ 'FF%','xGF%','GF%',
789
1115
  'Rush',"Rush xG",'Rush G',"GC%","AC%","GI%","FC%","xGC%",
790
- ]+[f'{stat}/60' for stat in per_sixty]+[f'{stat}/60 Percentile' for stat in per_sixty]+type_metrics].fillna(0).sort_values(['Player','Season','Team','ID'])
1116
+ 'F','FW','FL','F%',
1117
+ 'Penl','Penl2','Penl5',
1118
+ 'Draw','PIM','PENL%',
1119
+ 'Block',
1120
+ 'OZF','NZF','DZF',
1121
+ 'OZF%','NZF%','DZF%',
1122
+ ]+[f'{stat}/60' for stat in per_sixty]+type_metrics].fillna(0).sort_values(['Player','Season','Team','ID'])
791
1123
 
792
- print(f'...finished in {(length if length <60 else length/60):.2f} {'seconds' if length <60 else 'minutes'}.')
793
1124
  #Apply shot impacts if necessary (Note: this will remove skaters with fewer than 150 minutes of TOI due to the shot impact TOI rule)
794
1125
  if shot_impact:
795
- return nhl_shooting_impacts(complete,False)
796
- else:
797
- return complete
1126
+ complete = nhl_shooting_impacts(complete,'skater')
1127
+
1128
+ end = time.perf_counter()
1129
+ length = end-start
1130
+ print(f'...finished in {(length if length <60 else length/60):.2f} {'seconds' if length <60 else 'minutes'}.')
798
1131
 
799
- def nhl_plot_skaters_shots(pbp,skater_dict,strengths,marker_dict=event_markers,onice = 'indv',title = True,legend=False,xg='moneypuck'):
800
- #Returns list of plots for specified skaters
1132
+ return complete
1133
+
1134
+ def nhl_plot_skaters_shots(pbp,skater_dict,strengths,marker_dict=event_markers,onice = 'indv',title = True,legend=False):
1135
+ #Returns dict of plots for specified skaters
801
1136
  # param 'pbp' - pbp to plot data
802
1137
  # param 'skater_dict' - skaters to plot shots for (format: {'Patrice Bergeron':['20242025','BOS']})
803
1138
  # param 'strengths' - strengths to include in plotting
@@ -809,18 +1144,19 @@ def nhl_plot_skaters_shots(pbp,skater_dict,strengths,marker_dict=event_markers,o
809
1144
 
810
1145
  print(f'Plotting the following skater shots: {skater_dict}...')
811
1146
 
812
- #Iterate through games, adding plot to list
813
- skater_plots = []
1147
+ #Iterate through skaters, adding plots to dict
1148
+ skater_plots = {}
814
1149
  for skater in skater_dict.keys():
815
1150
  skater_info = skater_dict[skater]
816
1151
  title = f'{skater} Fenwick Shots for {skater_info[1]} in {skater_info[0][2:4]}-{skater_info[0][6:8]}' if title else ''
817
- skater_plots.append(plot_skater_shots(pbp,skater,skater_info[0],skater_info[1],strengths,title,marker_dict,onice,legend,xg))
1152
+ #Key is formatted as PLAYERSEASONTEAM (i.e. PATRICE BERGERON20212022BOS)
1153
+ skater_plots.update({f'{skater}{skater_info[0]}{skater_info[1]}':[plot_skater_shots(pbp,skater,skater_info[0],skater_info[1],strengths,title,marker_dict,onice,legend)]})
818
1154
 
819
1155
  #Return: list of plotted skater shot charts
820
1156
  return skater_plots
821
1157
 
822
- def nhl_plot_games(pbp,events,strengths,game_ids='all',marker_dict=event_markers,team_colors={'away':'primary','home':'primary'},legend=False,xg='moneypuck'):
823
- #Returns list of plots for specified games
1158
+ def nhl_plot_games(pbp,events,strengths,game_ids='all',marker_dict=event_markers,team_colors={'away':'primary','home':'primary'},legend=False):
1159
+ #Returns dict of plots for specified games
824
1160
  # param 'pbp' - pbp to plot data
825
1161
  # param 'events' - type of events to plot
826
1162
  # param 'strengths' - strengths to include in plotting
@@ -835,8 +1171,10 @@ def nhl_plot_games(pbp,events,strengths,game_ids='all',marker_dict=event_markers
835
1171
 
836
1172
  print(f'Plotting the following games: {game_ids}...')
837
1173
 
838
- #Iterate through games, adding plot to list
839
- game_plots = [plot_game_events(pbp,game,events,strengths,marker_dict,team_colors,legend,xg) for game in game_ids]
1174
+ game_plots = {}
1175
+ #Iterate through games, adding plot to dict
1176
+ for game in game_ids:
1177
+ game_plots.update({game:[plot_game_events(pbp,game,events,strengths,marker_dict,team_colors,legend)]})
840
1178
 
841
1179
  #Return: list of plotted game events
842
1180
  return game_plots
@@ -845,7 +1183,7 @@ def repo_load_rosters(seasons = []):
845
1183
  #Returns roster data from repository
846
1184
  # param 'seasons' - list of seasons to include
847
1185
 
848
- data = pd.read_csv("rosters/nhl_rosters.csv")
1186
+ data = pd.read_csv(default_roster)
849
1187
  if len(seasons)>0:
850
1188
  data = data.loc[data['season'].isin(seasons)]
851
1189
 
@@ -855,7 +1193,7 @@ def repo_load_schedule(seasons = []):
855
1193
  #Returns schedule data from repository
856
1194
  # param 'seasons' - list of seasons to include
857
1195
 
858
- data = pd.read_csv("schedule/schedule.csv")
1196
+ data = pd.read_csv(schedule_path)
859
1197
  if len(seasons)>0:
860
1198
  data = data.loc[data['season'].isin(seasons)]
861
1199
 
@@ -864,7 +1202,7 @@ def repo_load_schedule(seasons = []):
864
1202
  def repo_load_teaminfo():
865
1203
  #Returns team data from repository
866
1204
 
867
- return pd.read_csv("teaminfo/nhl_teaminfo.csv")
1205
+ return pd.read_csv(info_path)
868
1206
 
869
1207
  def repo_load_pbp(seasons = []):
870
1208
  #Returns play-by-play data from repository
@@ -872,7 +1210,7 @@ def repo_load_pbp(seasons = []):
872
1210
 
873
1211
  #Add parquet to total
874
1212
  print(f'Loading play-by-play from the following seasons: {seasons}...')
875
- dfs = [pd.read_parquet(f"https://github.com/owensingh38/wsba_hockey/raw/refs/heads/main/src/wsba_hockey/pbp/parquet/nhl_pbp_{season}.parquet") for season in seasons]
1213
+ dfs = [pd.read_parquet(f"https://f005.backblazeb2.com/file/weakside-breakout/pbp/{season}.parquet") for season in seasons]
876
1214
 
877
1215
  return pd.concat(dfs)
878
1216
 
@@ -880,9 +1218,3 @@ def repo_load_seasons():
880
1218
  #List of available seasons to scrape
881
1219
 
882
1220
  return seasons
883
-
884
- def admin_convert_to_parquet(seasons):
885
- for season in seasons:
886
- load = pd.read_csv(f'pbp/csv/nhl_pbp_{season}.csv')
887
-
888
- load.to_parquet(f'pbp/parquet/nhl_pbp_{season}.parquet',index=False)