wsba-hockey 1.1.9__py3-none-any.whl → 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (144) hide show
  1. wsba_hockey/tools/scraping.py +146 -170
  2. wsba_hockey/tools/utils/__init__.py +0 -1
  3. wsba_hockey/tools/utils/shared.py +14 -389
  4. wsba_hockey/tools/xg_model.py +6 -1
  5. wsba_hockey/wsba_main.py +45 -10
  6. {wsba_hockey-1.1.9.dist-info → wsba_hockey-1.2.0.dist-info}/METADATA +16 -15
  7. wsba_hockey-1.2.0.dist-info/RECORD +15 -0
  8. wsba_hockey/api/api/index.py +0 -162
  9. wsba_hockey/data_pipelines.py +0 -247
  10. wsba_hockey/evidence/weakside-breakout/node_modules/duckdb/vendor.py +0 -146
  11. wsba_hockey/evidence/weakside-breakout/node_modules/flatted/python/flatted.py +0 -149
  12. wsba_hockey/evidence/weakside-breakout/node_modules/flatted/python/test.py +0 -63
  13. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/gyp_main.py +0 -45
  14. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSNew.py +0 -367
  15. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSProject.py +0 -206
  16. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSSettings.py +0 -1270
  17. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSSettings_test.py +0 -1547
  18. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSToolFile.py +0 -59
  19. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSUserFile.py +0 -153
  20. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSUtil.py +0 -271
  21. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSVersion.py +0 -574
  22. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/__init__.py +0 -690
  23. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/common.py +0 -661
  24. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/common_test.py +0 -78
  25. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/easy_xml.py +0 -165
  26. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/easy_xml_test.py +0 -109
  27. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/flock_tool.py +0 -55
  28. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/__init__.py +0 -0
  29. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/analyzer.py +0 -808
  30. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/android.py +0 -1173
  31. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/cmake.py +0 -1321
  32. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/compile_commands_json.py +0 -120
  33. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/dump_dependency_json.py +0 -103
  34. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/eclipse.py +0 -464
  35. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/gypd.py +0 -89
  36. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/gypsh.py +0 -58
  37. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/make.py +0 -2714
  38. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/msvs.py +0 -3981
  39. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/msvs_test.py +0 -44
  40. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/ninja.py +0 -2936
  41. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/ninja_test.py +0 -55
  42. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/xcode.py +0 -1394
  43. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/xcode_test.py +0 -25
  44. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/input.py +0 -3130
  45. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/input_test.py +0 -98
  46. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/mac_tool.py +0 -771
  47. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/msvs_emulation.py +0 -1271
  48. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/ninja_syntax.py +0 -174
  49. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/simple_copy.py +0 -61
  50. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/win_tool.py +0 -374
  51. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/xcode_emulation.py +0 -1939
  52. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/xcode_ninja.py +0 -302
  53. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/xcodeproj_file.py +0 -3197
  54. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/xml_fix.py +0 -65
  55. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/test_gyp.py +0 -261
  56. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/tools/graphviz.py +0 -102
  57. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/tools/pretty_gyp.py +0 -156
  58. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/tools/pretty_sln.py +0 -181
  59. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/tools/pretty_vcproj.py +0 -339
  60. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/test/fixtures/test-charmap.py +0 -31
  61. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/update-gyp.py +0 -64
  62. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/gyp_main.py +0 -45
  63. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSNew.py +0 -367
  64. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSProject.py +0 -206
  65. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSSettings.py +0 -1270
  66. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSSettings_test.py +0 -1547
  67. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSToolFile.py +0 -59
  68. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSUserFile.py +0 -153
  69. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSUtil.py +0 -271
  70. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSVersion.py +0 -574
  71. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/__init__.py +0 -666
  72. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/common.py +0 -654
  73. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/common_test.py +0 -78
  74. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/easy_xml.py +0 -165
  75. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/easy_xml_test.py +0 -109
  76. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/flock_tool.py +0 -55
  77. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/__init__.py +0 -0
  78. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/analyzer.py +0 -808
  79. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/android.py +0 -1173
  80. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/cmake.py +0 -1321
  81. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/compile_commands_json.py +0 -120
  82. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/dump_dependency_json.py +0 -103
  83. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/eclipse.py +0 -464
  84. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/gypd.py +0 -89
  85. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/gypsh.py +0 -58
  86. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/make.py +0 -2518
  87. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/msvs.py +0 -3978
  88. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/msvs_test.py +0 -44
  89. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/ninja.py +0 -2936
  90. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/ninja_test.py +0 -55
  91. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/xcode.py +0 -1394
  92. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/xcode_test.py +0 -25
  93. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/input.py +0 -3137
  94. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/input_test.py +0 -98
  95. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/mac_tool.py +0 -771
  96. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/msvs_emulation.py +0 -1271
  97. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/ninja_syntax.py +0 -174
  98. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/simple_copy.py +0 -61
  99. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/win_tool.py +0 -374
  100. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/xcode_emulation.py +0 -1939
  101. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/xcode_ninja.py +0 -302
  102. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/xcodeproj_file.py +0 -3197
  103. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/xml_fix.py +0 -65
  104. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/setup.py +0 -42
  105. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/test_gyp.py +0 -260
  106. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/tools/graphviz.py +0 -102
  107. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/tools/pretty_gyp.py +0 -156
  108. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/tools/pretty_sln.py +0 -181
  109. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/tools/pretty_vcproj.py +0 -339
  110. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/test/fixtures/test-charmap.py +0 -31
  111. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/update-gyp.py +0 -46
  112. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/duos/app.py +0 -210
  113. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/duos/calc.py +0 -163
  114. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/game_stats/app.py +0 -401
  115. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/game_stats/name_fix.py +0 -47
  116. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/goalie/app.py +0 -101
  117. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/goalie/plot.py +0 -71
  118. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/goalie/rink_plot.py +0 -245
  119. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/heatmaps/app.py +0 -108
  120. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/heatmaps/plot.py +0 -95
  121. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/heatmaps/rink_plot.py +0 -245
  122. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/line-combos/app.py +0 -245
  123. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/line-combos/plot.py +0 -275
  124. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/line-combos/rink_plot.py +0 -245
  125. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/matchups/app.py +0 -145
  126. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/matchups/plot.py +0 -79
  127. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/matchups/rink_plot.py +0 -245
  128. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/pbp/app.py +0 -406
  129. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/pbp/plot.py +0 -79
  130. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/pbp/rink_plot.py +0 -245
  131. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/skater/app.py +0 -110
  132. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/skater/plot.py +0 -59
  133. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/skater/rink_plot.py +0 -245
  134. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/team_heatmaps/app.py +0 -103
  135. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/team_heatmaps/plot.py +0 -95
  136. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/team_heatmaps/rink_plot.py +0 -245
  137. wsba_hockey/flask/app.py +0 -77
  138. wsba_hockey/tools/utils/config.py +0 -14
  139. wsba_hockey/tools/utils/save_pages.py +0 -133
  140. wsba_hockey/workspace.py +0 -28
  141. wsba_hockey-1.1.9.dist-info/RECORD +0 -148
  142. {wsba_hockey-1.1.9.dist-info → wsba_hockey-1.2.0.dist-info}/WHEEL +0 -0
  143. {wsba_hockey-1.1.9.dist-info → wsba_hockey-1.2.0.dist-info}/licenses/LICENSE +0 -0
  144. {wsba_hockey-1.1.9.dist-info → wsba_hockey-1.2.0.dist-info}/top_level.txt +0 -0
@@ -1,207 +1,28 @@
1
- ### CODE IN THIS DIRECTORY ORIGINALLY (FULLY OR PARITALLY) WRITTEN BY HARRY SHOMER IN THE "hockey_scraper" PACKAGE
2
-
3
- ## shared.py ##
4
- """
5
- This file is a bunch of the shared functions or just general stuff used by the different scrapers in the package.
6
- """
7
1
  import os
8
2
  import time
9
3
  import json
10
- import logging
11
- import warnings
12
- import requests
13
4
  from datetime import datetime, timedelta
14
- from requests.adapters import HTTPAdapter
15
- from urllib3.util import Retry
16
- from . import save_pages as sp
17
- from . import config
18
- import inspect
5
+ import re
6
+ from bs4 import BeautifulSoup, SoupStrainer
19
7
 
20
- # Directory where this file lives
21
- FILE_DIR = os.path.dirname(os.path.realpath(__file__))
8
+ ## SHARED FUCNCTIONS ##
9
+ # Most code in this file originates (entirely or partially) from the hockey_scraper package by Harry Shomer #
22
10
 
23
- # Name and Team fixes used
24
- with open(os.path.join(FILE_DIR, "player_name_fixes.json"), "r" ,encoding="utf-8") as f:
25
- Names = json.load(f)['fixes']
11
+ dir = os.path.dirname(os.path.realpath(__file__))
26
12
 
27
- with open(os.path.join(FILE_DIR, "team_tri_codes.json"), "r" ,encoding="utf-8") as f:
13
+ with open(os.path.join(dir, "team_tri_codes.json"), "r" ,encoding="utf-8") as f:
28
14
  TEAMS = json.load(f)['teams']
29
15
 
30
-
31
- def fix_name(name):
32
- """
33
- Check if a name falls under those that need fixing. If it does...fix it.
34
-
35
- :param name: name in pbp
36
-
37
- :return: Either the given parameter or the fixed name
38
- """
39
- return Names.get(name.upper(), name.upper()).upper()
40
-
41
-
42
16
  def get_team(team):
43
- """
44
- Get the fucking team
45
- """
17
+ #Parse team header in HTML
46
18
  return TEAMS.get(team.upper(), team.upper()).upper()
47
19
 
48
-
49
- def custom_formatwarning(msg, *args, **kwargs):
50
- """
51
- Override format for standard wanings
52
- """
53
- ansi_no_color = '\033[0m'
54
- return "{msg}\n{no_color}".format(no_color=ansi_no_color, msg=msg)
55
-
56
- warnings.formatwarning = custom_formatwarning
57
-
58
-
59
- def print_error(msg):
60
- """
61
- Implement own custom error using warning module. Prints in red
62
-
63
- Reason why i still use warning for errors is so i can set to ignore them if i want to (e.g. live_scrape line 200).
64
-
65
- :param msg: Str to print
66
-
67
- :return: None
68
- """
69
- ansi_red_code = '\033[0;31m'
70
- warning_msg = "{}Error: {}".format(ansi_red_code, msg)
71
-
72
- # if config.LOG:
73
- # caller_file = os.path.basename(inspect.stack()[1].filename)
74
- # get_logger(caller_file).error(msg + " " + verbose)
75
-
76
- warnings.warn(warning_msg)
77
-
78
-
79
- def print_warning(msg):
80
- """
81
- Implement own custom warning using warning module. Prints in Orange.
82
-
83
- :param msg: Str to print
84
-
85
- :return: None
86
- """
87
- ansi_yellow_code = '\033[0;33m'
88
- warning_msg = "{}Warning: {}".format(ansi_yellow_code, msg)
89
-
90
- warnings.warn(warning_msg)
91
-
92
-
93
- def get_logger(python_file):
94
- """
95
- Create a basic logger to a log file
96
-
97
- :param python_file: File that instantiates the logger instance
98
-
99
- :return: logger
100
- """
101
- base_py_file = os.path.basename(python_file)
102
-
103
- # If already exists we don't try to recreate it
104
- if base_py_file in logging.Logger.manager.loggerDict.keys():
105
- return logging.getLogger(base_py_file)
106
-
107
- logger = logging.getLogger(base_py_file)
108
- logger.setLevel(logging.INFO)
109
-
110
- fh = logging.FileHandler("hockey_scraper_errors_{}.log".format(datetime.now().strftime("%Y-%m-%dT%H:%M:%S")))
111
- fh.setFormatter(logging.Formatter('%(asctime)s\t%(name)s\t%(levelname)s\t%(message)s', datefmt='%Y-%m-%d %I:%M:%S'))
112
- logger.addHandler(fh)
113
-
114
- return logger
115
-
116
-
117
- def log_error(err, py_file):
118
- """
119
- Log error when Logging is specified
120
-
121
- :param err: Error to log
122
- :param python_file: File that instantiates the logger instance
123
-
124
- :return: None
125
- """
126
- if config.LOG:
127
- get_logger(py_file).error(err)
128
-
129
-
130
- def get_season(date):
131
- """
132
- Get Season based on from_date
133
-
134
- There is an exception for the 2019-2020 pandemic season. Accoding to the below url:
135
- - 2019-2020 season ends in Oct. 2020
136
- - 2020-2021 season begins in November 2020
137
- - https://nhl.nbcsports.com/2020/07/10/new-nhl-critical-dates-calendar-means-an-october-free-agent-frenzy/
138
-
139
- :param date: date
140
-
141
- :return: season -> ex: 2016 for 2016-2017 season
142
- """
143
- year = date[:4]
144
- date = datetime.strptime(date, "%Y-%m-%d")
145
- initial_bound = datetime.strptime('-'.join([year, '01-01']), "%Y-%m-%d")
146
-
147
- # End bound for year1-year2 season is later for pandemic year
148
- if initial_bound <= date <= season_end_bound(year):
149
- return int(year) - 1
150
-
151
- return int(year)
152
-
153
-
154
- def season_start_bound(year):
155
- """
156
- Get start bound for a season.
157
-
158
- Notes:
159
- - There is a bug in the schedule API for 2016 that causes the pushback to 09-30
160
- - Pandemic season started in January
161
-
162
- :param year: str of year for given date
163
-
164
- :return: str of first date in season
165
- """
166
- if int(year) == 2016:
167
- return "2016-09-30"
168
-
169
- if int(year) == 2020:
170
- return '2021-01-01'
171
-
172
- return "{}-09-01".format(str(year))
173
-
174
-
175
-
176
- def season_end_bound(year):
177
- """
178
- Determine the end bound of a given season. Changes depending on if it's the pandemic season or not
179
-
180
- :param year: str of year for given date
181
-
182
- :return: Datetime obj of last date in season
183
- """
184
- normal_end_bound = datetime.strptime('-'.join([str(year), '08-31']), "%Y-%m-%d")
185
- pandemic_end_bound = datetime.strptime('-'.join([str(year), '10-31']), "%Y-%m-%d")
186
-
187
- if int(year) == 2020:
188
- return pandemic_end_bound
189
-
190
- return normal_end_bound
191
-
192
-
193
20
  def convert_to_seconds(minutes):
194
- """
195
- Return minutes elapsed in time format to seconds elapsed
196
-
197
- :param minutes: time elapsed
198
-
199
- :return: time elapsed in seconds
200
- """
21
+ #Convert time formatted as MM:SS in a period to raw seconds
201
22
  if minutes == '-16:0-':
202
- return '1200' # Sometimes in the html at the end of the game the time is -16:0-
23
+ return '1200' #Sometimes in the html at the end of the game the time is -16:0-
203
24
 
204
- # If the time is junk not much i can do
25
+ #Validate time (invalid times are generally ignored)
205
26
  try:
206
27
  x = time.strptime(minutes.strip(' '), '%M:%S')
207
28
  except ValueError:
@@ -209,189 +30,8 @@ def convert_to_seconds(minutes):
209
30
 
210
31
  return timedelta(hours=x.tm_hour, minutes=x.tm_min, seconds=x.tm_sec).total_seconds()
211
32
 
212
-
213
- def if_rescrape(user_rescrape):
214
- """
215
- If you want to re_scrape. If someone is a dumbass and feeds it a non-boolean it terminates the program
216
-
217
- Note: Only matters when you have a directory specified
218
-
219
- :param user_rescrape: Boolean
220
-
221
- :return: None
222
- """
223
- if isinstance(user_rescrape, bool):
224
- config.RESCRAPE = user_rescrape
225
- else:
226
- raise ValueError("Error: 'if_rescrape' must be a boolean. Not a {}".format(type(user_rescrape)))
227
-
228
-
229
- def add_dir(user_dir):
230
- """
231
- Add directory to store scraped docs if valid. Or create in the home dir
232
-
233
- NOTE: After this functions docs_dir is either None or a valid directory
234
-
235
- :param user_dir: If bool=True create in the home dire or if user provided directory on their machine
236
-
237
- :return: None
238
- """
239
- # False so they don't want it
240
- if not user_dir:
241
- config.DOCS_DIR = False
242
- return
243
-
244
- # Something was given
245
- # Either True or string to directory
246
- # If boolean refer to the home directory
247
- if isinstance(user_dir, bool):
248
- config.DOCS_DIR = os.path.join(os.path.expanduser('~'), "hockey_scraper_data")
249
- # Create if needed
250
- if not os.path.isdir(config.DOCS_DIR):
251
- print_warning("Creating the hockey_scraper_data directory in the home directory")
252
- os.mkdir(config.DOCS_DIR)
253
- elif isinstance(user_dir, str) and os.path.isdir(user_dir):
254
- config.DOCS_DIR = user_dir
255
- elif not (isinstance(user_dir, str) and isinstance(user_dir, bool)):
256
- config.DOCS_DIR = False
257
- print_error("The docs_dir argument provided is invalid")
258
- else:
259
- config.DOCS_DIR = False
260
- print_error("The directory specified for the saving of scraped docs doesn't exist. Therefore:"
261
- "\n1. All specified games will be scraped from their appropriate sources (NHL or ESPN)."
262
- "\n2. All scraped files will NOT be saved at all. Please either create the directory you want them to be "
263
- "deposited in or recheck the directory you typed in and start again.\n")
264
-
265
-
266
- def scrape_page(url):
267
- """
268
- Scrape a given url
269
-
270
- :param url: url for page
271
-
272
- :return: response object
273
- """
274
- response = requests.Session()
275
- retries = Retry(total=10, backoff_factor=.1)
276
- response.mount('http://', HTTPAdapter(max_retries=retries))
277
-
278
- try:
279
- response = response.get(url, timeout=5)
280
- response.raise_for_status()
281
- page = response.text
282
- except (requests.exceptions.HTTPError, requests.exceptions.ConnectionError):
283
- page = None
284
- except requests.exceptions.ReadTimeout:
285
- # If it times out and it's the schedule print an error message...otherwise just make the page = None
286
- if "schedule" in url:
287
- raise Exception("Timeout Error: The NHL API took too long to respond to our request. "
288
- "Please Try Again (you may need to try a few times before it works). ")
289
- else:
290
- print_error("Timeout Error: The server took too long to respond to our request.")
291
- page = None
292
-
293
- # Pause for 1 second - make it more if you want
294
- time.sleep(1)
295
-
296
- return page
297
-
298
-
299
-
300
- def get_file(file_info, force=False):
301
- """
302
- Get the specified file.
303
-
304
- If a docs_dir is provided we check if it exists. If it does we see if it contains that page (and saves if it
305
- doesn't). If the docs_dir doesn't exist we just scrape from the source and not save.
306
-
307
- :param file_info: Dictionary containing the info for the file.
308
- Contains the url, name, type, and season
309
- :param force: Force a rescrape. Default is False
310
-
311
- :return: page
312
- """
313
- file_info['dir'] = config.DOCS_DIR
314
-
315
- # If everything checks out we'll retrieve it, otherwise we scrape it
316
- if file_info['dir'] and sp.check_file_exists(file_info) and not config.RESCRAPE and not force:
317
- page = sp.get_page(file_info)
318
- else:
319
- page = scrape_page(file_info['url'])
320
- sp.save_page(page, file_info)
321
-
322
- return page
323
-
324
-
325
- def check_data_format(data_format):
326
- """
327
- Checks if data_format specified (if it is at all) is either None, 'Csv', or 'pandas'.
328
- It exits program with error message if input isn't good.
329
-
330
- :param data_format: data_format provided
331
-
332
- :return: Boolean - True if good
333
- """
334
- if not data_format or data_format.lower() not in ['csv', 'pandas']:
335
- raise ValueError('{} is an unspecified data format. The two options are Csv and Pandas '
336
- '(Csv is default)\n'.format(data_format))
337
-
338
-
339
- def check_valid_dates(from_date, to_date):
340
- """
341
- Check if it's a valid date range
342
-
343
- :param from_date: date should scrape from
344
- :param to_date: date should scrape to
345
-
346
- :return: None
347
- """
348
- try:
349
- if time.strptime(to_date, "%Y-%m-%d") < time.strptime(from_date, "%Y-%m-%d"):
350
- raise ValueError("Error: The second date input is earlier than the first one")
351
- except ValueError:
352
- raise ValueError("Error: Incorrect format given for dates. They must be given like 'yyyy-mm-dd' "
353
- "(ex: '2016-10-01').")
354
-
355
-
356
- def to_csv(base_file_name, df, league, file_type):
357
- """
358
- Write DataFrame to csv file
359
-
360
- :param base_file_name: name of file
361
- :param df: DataFrame
362
- :param league: nhl or nwhl
363
- :param file_type: type of file despoiting
364
-
365
- :return: None
366
- """
367
- docs_dir = config.DOCS_DIR
368
-
369
- # This was a late addition so we add support here
370
- if isinstance(docs_dir, str) and not os.path.isdir(os.path.join(docs_dir, "csvs")):
371
- os.mkdir(os.path.join(docs_dir, "csvs"))
372
-
373
- if df is not None:
374
- if isinstance(docs_dir, str):
375
- file_name = os.path.join(docs_dir, "csvs", '{}_{}_{}.csv'.format(league, file_type, base_file_name))
376
- else:
377
- file_name = '{}_{}_{}.csv'.format(league, file_type, base_file_name)
378
-
379
- print("---> {} {} data deposited in file - {}".format(league, file_type, file_name))
380
- df.to_csv(file_name, sep=',', encoding='utf-8')
381
-
382
- import re
383
- from bs4 import BeautifulSoup, SoupStrainer
384
-
385
- ## html_pbp.py ##
386
33
  def get_contents(game_html):
387
- """
388
- Uses Beautiful soup to parses the html document.
389
- Some parsers work for some pages but don't work for others....I'm not sure why so I just try them all here in order
390
-
391
- :param game_html: html doc
392
-
393
- :return: "soupified" html
394
- """
34
+ #Parse NHL HTML PBP document
395
35
  parsers = ["html5lib", "lxml", "html.parser"]
396
36
  strainer = SoupStrainer('td', attrs={'class': re.compile(r'bborder')})
397
37
 
@@ -409,16 +49,8 @@ def get_contents(game_html):
409
49
 
410
50
  return tds
411
51
 
412
- ## html_shifts.py ##
413
52
  def get_soup(shifts_html):
414
- """
415
- Uses Beautiful soup to parses the html document.
416
- Some parsers work for some pages but don't work for others....I'm not sure why so I just try them all here in order
417
-
418
- :param shifts_html: html doc
419
-
420
- :return: "soupified" html and player_shifts portion of html (it's a bunch of td tags)
421
- """
53
+ #Convert html document to soup
422
54
  parsers = ["lxml", "html.parser", "html5lib"]
423
55
 
424
56
  for parser in parsers:
@@ -430,19 +62,12 @@ def get_soup(shifts_html):
430
62
 
431
63
  return td, get_teams(soup)
432
64
 
433
-
434
65
  def get_teams(soup):
435
- """
436
- Return the team for the TOI tables and the home team
437
-
438
- :param soup: souped up html
439
-
440
- :return: list with team and home team
441
- """
66
+ #Find and return list of teams a given document's match (for HTML shifts parsing)
442
67
  team = soup.find('td', class_='teamHeading + border') # Team for shifts
443
68
  team = team.get_text()
444
69
 
445
- # Get Home Team
70
+ #Find home team
446
71
  teams = soup.find_all('td', {'align': 'center', 'style': 'font-size: 10px;font-weight:bold'})
447
72
  regex = re.compile(r'>(.*)<br/?>')
448
73
  home_team = regex.findall(str(teams[7]))
@@ -108,7 +108,12 @@ def fix_players(pbp):
108
108
  pbp[f'add_player_{i+1}_name'] = np.where(pbp[f'event_player_{i+1}_name'].isna(),pbp[f'event_player_{i+1}_id'].astype(str).replace(names_dict),np.nan)
109
109
  pbp[f'event_player_{i+1}_name'] = pbp[f'event_player_{i+1}_name'].combine_first(pbp[f'add_player_{i+1}_name'])
110
110
 
111
- pbp['event_goalie_name'] = pbp['event_goalie_id'].astype(str).replace(names_dict)
111
+ #For the first three pbp seasons the event_goalie_id isn't included as a column
112
+ try:
113
+ pbp['event_goalie_name'] = pbp['event_goalie_id'].astype(str).replace(names_dict)
114
+ except KeyError:
115
+ pbp['event_goalie_id'] = np.where(pbp['event_team_venue']=='home',pbp['home_goalie_id'],pbp['away_goalie_id']).astype(str)
116
+ pbp['event_goalie_name'] = pbp['event_goalie_id'].astype(str).replace(names_dict)
112
117
 
113
118
  #Add hands
114
119
  pbp['event_player_1_hand'] = pbp['event_player_1_id'].astype(str).str.replace('.0','').replace(roster_dict)
wsba_hockey/wsba_main.py CHANGED
@@ -34,7 +34,8 @@ SEASONS = [
34
34
  20212022,
35
35
  20222023,
36
36
  20232024,
37
- 20242025
37
+ 20242025,
38
+ 20252026
38
39
  ]
39
40
 
40
41
  CONVERT_SEASONS = {2007: 20072008,
@@ -54,7 +55,28 @@ CONVERT_SEASONS = {2007: 20072008,
54
55
  2021: 20212022,
55
56
  2022: 20222023,
56
57
  2023: 20232024,
57
- 2024: 20242025}
58
+ 2024: 20242025,
59
+ 2025: 20252026}
60
+
61
+ SEASON_NAMES = {20072008: '2007-08',
62
+ 20082009: '2008-09',
63
+ 20092010: '2009-10',
64
+ 20102011: '2010-11',
65
+ 20112012: '2011-12',
66
+ 20122013: '2012-13',
67
+ 20132014: '2013-14',
68
+ 20142015: '2014-15',
69
+ 20152016: '2015-16',
70
+ 20162017: '2016-17',
71
+ 20172018: '2017-18',
72
+ 20182019: '2018-19',
73
+ 20192020: '2019-20',
74
+ 20202021: '2020-21',
75
+ 20212022: '2021-22',
76
+ 20222023: '2022-23',
77
+ 20232024: '2023-24',
78
+ 20242025: '2024-25',
79
+ 20252025: '2025-26'}
58
80
 
59
81
  CONVERT_TEAM_ABBR = {'L.A':'LAK',
60
82
  'N.J':'NJD',
@@ -72,6 +94,7 @@ KNOWN_PROBS = {
72
94
  2008020259:'HTML data is completely missing for this game.',
73
95
  2008020409:'HTML data is completely missing for this game.',
74
96
  2008021077:'HTML data is completely missing for this game.',
97
+ 2008030311:'Missing shifts data for game between Pittsburgh and Carolina',
75
98
  2009020081:'HTML pbp for this game between Pittsburgh and Carolina is missing all but the period start and first faceoff events, for some reason.',
76
99
  2009020658:'Missing shifts data for game between New York Islanders and Dallas.',
77
100
  2009020885:'Missing shifts data for game between Sharks and Blue Jackets.',
@@ -99,7 +122,7 @@ def nhl_scrape_game(game_ids:list[int], split_shifts:bool = False, remove:list[s
99
122
  Given a set of game_ids (NHL API), return complete play-by-play information as requested.
100
123
 
101
124
  Args:
102
- game_ids (List[int] or ['random', int, int, int]):
125
+ game_ids (int or List[int] or ['random', int, int, int]):
103
126
  List of NHL game IDs to scrape or use ['random', n, start_year, end_year] to fetch n random games.
104
127
  split_shifts (bool, optional):
105
128
  If True, returns a dict with separate 'pbp' and 'shifts' DataFrames. Default is False.
@@ -122,10 +145,13 @@ def nhl_scrape_game(game_ids:list[int], split_shifts:bool = False, remove:list[s
122
145
  - 'errors' (optional): list of game IDs that failed if errors=True
123
146
  """
124
147
 
148
+ #Wrap game_id in a list if only a single game_id is provided
149
+ game_ids = [game_ids] if type(game_ids) != list else game_ids
150
+
125
151
  pbps = []
126
152
  if game_ids[0] == 'random':
127
153
  #Randomize selection of game_ids
128
- #Some ids returned may be invalid (for example, 2020021300)
154
+ #Some ids returned may be invalid (for example, 2020022000)
129
155
  num = game_ids[1]
130
156
  start = game_ids[2] if len(game_ids) > 1 else 2007
131
157
  end = game_ids[3] if len(game_ids) > 2 else (date.today().year)-1
@@ -190,7 +216,7 @@ def nhl_scrape_game(game_ids:list[int], split_shifts:bool = False, remove:list[s
190
216
 
191
217
  #Track error
192
218
  error_ids.append(game_id)
193
-
219
+
194
220
  #Add all pbps together
195
221
  if not pbps:
196
222
  print("\rNo data returned.")
@@ -1149,6 +1175,9 @@ def nhl_calculate_stats(pbp:pd.DataFrame, type:Literal['skater','goalie','team']
1149
1175
  #Find player headshot
1150
1176
  complete['Headshot'] = 'https://assets.nhle.com/mugs/nhl/'+complete['Season'].astype(str)+'/'+complete['Team']+'/'+complete['ID'].astype(int).astype(str)+'.png'
1151
1177
 
1178
+ #Convert season name
1179
+ complete['Season'] = complete['Season'].replace(SEASON_NAMES)
1180
+
1152
1181
  head = ['Goalie','ID','Game'] if 'Game' in complete.columns else ['Goalie','ID']
1153
1182
  complete = complete[head+[
1154
1183
  "Season","Team",'WSBA',
@@ -1191,6 +1220,9 @@ def nhl_calculate_stats(pbp:pd.DataFrame, type:Literal['skater','goalie','team']
1191
1220
  complete['xGF%'] = complete['xGF']/(complete['xGF']+complete['xGA'])
1192
1221
  complete['FF%'] = complete['FF']/(complete['FF']+complete['FA'])
1193
1222
  complete['CF%'] = complete['CF']/(complete['CF']+complete['CA'])
1223
+
1224
+ #Convert season name
1225
+ complete['Season'] = complete['Season'].replace(SEASON_NAMES)
1194
1226
 
1195
1227
  head = ['Team','Game'] if 'Game' in complete.columns else ['Team']
1196
1228
  complete = complete[head+[
@@ -1286,6 +1318,9 @@ def nhl_calculate_stats(pbp:pd.DataFrame, type:Literal['skater','goalie','team']
1286
1318
  for stat in PER_SIXTY[:3]:
1287
1319
  type_metrics.append(f'{type.capitalize()}{stat}')
1288
1320
 
1321
+ #Convert season name
1322
+ complete['Season'] = complete['Season'].replace(SEASON_NAMES)
1323
+
1289
1324
  head = ['Player','ID','Game'] if 'Game' in complete.columns else ['Player','ID']
1290
1325
  complete = complete[head+[
1291
1326
  "Season","Team",'WSBA',
@@ -1529,13 +1564,13 @@ class NHL_Database:
1529
1564
  The initialized play-by-play dataset.
1530
1565
  """
1531
1566
 
1532
- print('Initializing database...')
1567
+ print(f'Initializing database "{name}"...')
1533
1568
  self.name = name
1534
1569
 
1535
1570
  if game_ids:
1536
- self.pbp = nhl_scrape_game(game_ids)
1571
+ self.pbp = nhl_apply_xG(nhl_scrape_game(game_ids))
1537
1572
  else:
1538
- self.pbp = nhl_scrape_game(['random',3,2010,2024]) if pbp.empty else pbp
1573
+ self.pbp = nhl_apply_xG(nhl_scrape_game(['random',3,2007,2024])) if pbp.empty else pbp
1539
1574
 
1540
1575
  self.games = self.pbp['game_id'].drop_duplicates().to_list()
1541
1576
  self.stats = {}
@@ -1555,7 +1590,7 @@ class NHL_Database:
1555
1590
  """
1556
1591
 
1557
1592
  print('Adding games...')
1558
- self.pbp = pd.concat([self.pbp,wsba.nhl_scrape_game(game_ids)])
1593
+ self.pbp = pd.concat([self.pbp,nhl_apply_xG(wsba.nhl_scrape_game(game_ids))])
1559
1594
 
1560
1595
  return self.pbp
1561
1596
 
@@ -1650,7 +1685,7 @@ class NHL_Database:
1650
1685
  Root folder to export data into. Defaults to `self.name/`.
1651
1686
  """
1652
1687
 
1653
- print('Exporting data...')
1688
+ print(f'Exporting data in database "{self.name}"...')
1654
1689
  start = time.perf_counter()
1655
1690
 
1656
1691
  # Use default path if none provided