wsba-hockey 1.1.8__py3-none-any.whl → 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (145) hide show
  1. wsba_hockey/__init__.py +22 -1
  2. wsba_hockey/tools/scraping.py +166 -190
  3. wsba_hockey/tools/utils/__init__.py +0 -1
  4. wsba_hockey/tools/utils/shared.py +14 -389
  5. wsba_hockey/tools/xg_model.py +6 -1
  6. wsba_hockey/wsba_main.py +330 -20
  7. {wsba_hockey-1.1.8.dist-info → wsba_hockey-1.2.0.dist-info}/METADATA +16 -15
  8. wsba_hockey-1.2.0.dist-info/RECORD +15 -0
  9. wsba_hockey/api/api/index.py +0 -162
  10. wsba_hockey/data_pipelines.py +0 -247
  11. wsba_hockey/evidence/weakside-breakout/node_modules/duckdb/vendor.py +0 -146
  12. wsba_hockey/evidence/weakside-breakout/node_modules/flatted/python/flatted.py +0 -149
  13. wsba_hockey/evidence/weakside-breakout/node_modules/flatted/python/test.py +0 -63
  14. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/gyp_main.py +0 -45
  15. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSNew.py +0 -367
  16. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSProject.py +0 -206
  17. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSSettings.py +0 -1270
  18. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSSettings_test.py +0 -1547
  19. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSToolFile.py +0 -59
  20. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSUserFile.py +0 -153
  21. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSUtil.py +0 -271
  22. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSVersion.py +0 -574
  23. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/__init__.py +0 -690
  24. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/common.py +0 -661
  25. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/common_test.py +0 -78
  26. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/easy_xml.py +0 -165
  27. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/easy_xml_test.py +0 -109
  28. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/flock_tool.py +0 -55
  29. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/__init__.py +0 -0
  30. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/analyzer.py +0 -808
  31. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/android.py +0 -1173
  32. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/cmake.py +0 -1321
  33. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/compile_commands_json.py +0 -120
  34. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/dump_dependency_json.py +0 -103
  35. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/eclipse.py +0 -464
  36. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/gypd.py +0 -89
  37. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/gypsh.py +0 -58
  38. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/make.py +0 -2714
  39. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/msvs.py +0 -3981
  40. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/msvs_test.py +0 -44
  41. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/ninja.py +0 -2936
  42. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/ninja_test.py +0 -55
  43. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/xcode.py +0 -1394
  44. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/xcode_test.py +0 -25
  45. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/input.py +0 -3130
  46. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/input_test.py +0 -98
  47. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/mac_tool.py +0 -771
  48. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/msvs_emulation.py +0 -1271
  49. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/ninja_syntax.py +0 -174
  50. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/simple_copy.py +0 -61
  51. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/win_tool.py +0 -374
  52. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/xcode_emulation.py +0 -1939
  53. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/xcode_ninja.py +0 -302
  54. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/xcodeproj_file.py +0 -3197
  55. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/xml_fix.py +0 -65
  56. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/test_gyp.py +0 -261
  57. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/tools/graphviz.py +0 -102
  58. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/tools/pretty_gyp.py +0 -156
  59. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/tools/pretty_sln.py +0 -181
  60. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/tools/pretty_vcproj.py +0 -339
  61. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/test/fixtures/test-charmap.py +0 -31
  62. wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/update-gyp.py +0 -64
  63. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/gyp_main.py +0 -45
  64. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSNew.py +0 -367
  65. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSProject.py +0 -206
  66. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSSettings.py +0 -1270
  67. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSSettings_test.py +0 -1547
  68. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSToolFile.py +0 -59
  69. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSUserFile.py +0 -153
  70. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSUtil.py +0 -271
  71. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSVersion.py +0 -574
  72. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/__init__.py +0 -666
  73. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/common.py +0 -654
  74. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/common_test.py +0 -78
  75. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/easy_xml.py +0 -165
  76. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/easy_xml_test.py +0 -109
  77. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/flock_tool.py +0 -55
  78. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/__init__.py +0 -0
  79. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/analyzer.py +0 -808
  80. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/android.py +0 -1173
  81. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/cmake.py +0 -1321
  82. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/compile_commands_json.py +0 -120
  83. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/dump_dependency_json.py +0 -103
  84. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/eclipse.py +0 -464
  85. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/gypd.py +0 -89
  86. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/gypsh.py +0 -58
  87. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/make.py +0 -2518
  88. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/msvs.py +0 -3978
  89. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/msvs_test.py +0 -44
  90. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/ninja.py +0 -2936
  91. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/ninja_test.py +0 -55
  92. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/xcode.py +0 -1394
  93. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/xcode_test.py +0 -25
  94. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/input.py +0 -3137
  95. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/input_test.py +0 -98
  96. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/mac_tool.py +0 -771
  97. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/msvs_emulation.py +0 -1271
  98. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/ninja_syntax.py +0 -174
  99. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/simple_copy.py +0 -61
  100. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/win_tool.py +0 -374
  101. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/xcode_emulation.py +0 -1939
  102. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/xcode_ninja.py +0 -302
  103. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/xcodeproj_file.py +0 -3197
  104. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/xml_fix.py +0 -65
  105. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/setup.py +0 -42
  106. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/test_gyp.py +0 -260
  107. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/tools/graphviz.py +0 -102
  108. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/tools/pretty_gyp.py +0 -156
  109. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/tools/pretty_sln.py +0 -181
  110. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/tools/pretty_vcproj.py +0 -339
  111. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/test/fixtures/test-charmap.py +0 -31
  112. wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/update-gyp.py +0 -46
  113. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/duos/app.py +0 -210
  114. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/duos/calc.py +0 -163
  115. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/game_stats/app.py +0 -401
  116. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/game_stats/name_fix.py +0 -47
  117. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/goalie/app.py +0 -101
  118. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/goalie/plot.py +0 -71
  119. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/goalie/rink_plot.py +0 -245
  120. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/heatmaps/app.py +0 -108
  121. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/heatmaps/plot.py +0 -95
  122. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/heatmaps/rink_plot.py +0 -245
  123. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/line-combos/app.py +0 -245
  124. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/line-combos/plot.py +0 -275
  125. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/line-combos/rink_plot.py +0 -245
  126. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/matchups/app.py +0 -145
  127. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/matchups/plot.py +0 -79
  128. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/matchups/rink_plot.py +0 -245
  129. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/pbp/app.py +0 -406
  130. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/pbp/plot.py +0 -79
  131. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/pbp/rink_plot.py +0 -245
  132. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/skater/app.py +0 -110
  133. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/skater/plot.py +0 -59
  134. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/skater/rink_plot.py +0 -245
  135. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/team_heatmaps/app.py +0 -103
  136. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/team_heatmaps/plot.py +0 -95
  137. wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/team_heatmaps/rink_plot.py +0 -245
  138. wsba_hockey/flask/app.py +0 -77
  139. wsba_hockey/tools/utils/config.py +0 -14
  140. wsba_hockey/tools/utils/save_pages.py +0 -133
  141. wsba_hockey/workspace.py +0 -31
  142. wsba_hockey-1.1.8.dist-info/RECORD +0 -148
  143. {wsba_hockey-1.1.8.dist-info → wsba_hockey-1.2.0.dist-info}/WHEEL +0 -0
  144. {wsba_hockey-1.1.8.dist-info → wsba_hockey-1.2.0.dist-info}/licenses/LICENSE +0 -0
  145. {wsba_hockey-1.1.8.dist-info → wsba_hockey-1.2.0.dist-info}/top_level.txt +0 -0
@@ -1,207 +1,28 @@
1
- ### CODE IN THIS DIRECTORY ORIGINALLY (FULLY OR PARITALLY) WRITTEN BY HARRY SHOMER IN THE "hockey_scraper" PACKAGE
2
-
3
- ## shared.py ##
4
- """
5
- This file is a bunch of the shared functions or just general stuff used by the different scrapers in the package.
6
- """
7
1
  import os
8
2
  import time
9
3
  import json
10
- import logging
11
- import warnings
12
- import requests
13
4
  from datetime import datetime, timedelta
14
- from requests.adapters import HTTPAdapter
15
- from urllib3.util import Retry
16
- from . import save_pages as sp
17
- from . import config
18
- import inspect
5
+ import re
6
+ from bs4 import BeautifulSoup, SoupStrainer
19
7
 
20
- # Directory where this file lives
21
- FILE_DIR = os.path.dirname(os.path.realpath(__file__))
8
+ ## SHARED FUCNCTIONS ##
9
+ # Most code in this file originates (entirely or partially) from the hockey_scraper package by Harry Shomer #
22
10
 
23
- # Name and Team fixes used
24
- with open(os.path.join(FILE_DIR, "player_name_fixes.json"), "r" ,encoding="utf-8") as f:
25
- Names = json.load(f)['fixes']
11
+ dir = os.path.dirname(os.path.realpath(__file__))
26
12
 
27
- with open(os.path.join(FILE_DIR, "team_tri_codes.json"), "r" ,encoding="utf-8") as f:
13
+ with open(os.path.join(dir, "team_tri_codes.json"), "r" ,encoding="utf-8") as f:
28
14
  TEAMS = json.load(f)['teams']
29
15
 
30
-
31
- def fix_name(name):
32
- """
33
- Check if a name falls under those that need fixing. If it does...fix it.
34
-
35
- :param name: name in pbp
36
-
37
- :return: Either the given parameter or the fixed name
38
- """
39
- return Names.get(name.upper(), name.upper()).upper()
40
-
41
-
42
16
  def get_team(team):
43
- """
44
- Get the fucking team
45
- """
17
+ #Parse team header in HTML
46
18
  return TEAMS.get(team.upper(), team.upper()).upper()
47
19
 
48
-
49
- def custom_formatwarning(msg, *args, **kwargs):
50
- """
51
- Override format for standard wanings
52
- """
53
- ansi_no_color = '\033[0m'
54
- return "{msg}\n{no_color}".format(no_color=ansi_no_color, msg=msg)
55
-
56
- warnings.formatwarning = custom_formatwarning
57
-
58
-
59
- def print_error(msg):
60
- """
61
- Implement own custom error using warning module. Prints in red
62
-
63
- Reason why i still use warning for errors is so i can set to ignore them if i want to (e.g. live_scrape line 200).
64
-
65
- :param msg: Str to print
66
-
67
- :return: None
68
- """
69
- ansi_red_code = '\033[0;31m'
70
- warning_msg = "{}Error: {}".format(ansi_red_code, msg)
71
-
72
- # if config.LOG:
73
- # caller_file = os.path.basename(inspect.stack()[1].filename)
74
- # get_logger(caller_file).error(msg + " " + verbose)
75
-
76
- warnings.warn(warning_msg)
77
-
78
-
79
- def print_warning(msg):
80
- """
81
- Implement own custom warning using warning module. Prints in Orange.
82
-
83
- :param msg: Str to print
84
-
85
- :return: None
86
- """
87
- ansi_yellow_code = '\033[0;33m'
88
- warning_msg = "{}Warning: {}".format(ansi_yellow_code, msg)
89
-
90
- warnings.warn(warning_msg)
91
-
92
-
93
- def get_logger(python_file):
94
- """
95
- Create a basic logger to a log file
96
-
97
- :param python_file: File that instantiates the logger instance
98
-
99
- :return: logger
100
- """
101
- base_py_file = os.path.basename(python_file)
102
-
103
- # If already exists we don't try to recreate it
104
- if base_py_file in logging.Logger.manager.loggerDict.keys():
105
- return logging.getLogger(base_py_file)
106
-
107
- logger = logging.getLogger(base_py_file)
108
- logger.setLevel(logging.INFO)
109
-
110
- fh = logging.FileHandler("hockey_scraper_errors_{}.log".format(datetime.now().strftime("%Y-%m-%dT%H:%M:%S")))
111
- fh.setFormatter(logging.Formatter('%(asctime)s\t%(name)s\t%(levelname)s\t%(message)s', datefmt='%Y-%m-%d %I:%M:%S'))
112
- logger.addHandler(fh)
113
-
114
- return logger
115
-
116
-
117
- def log_error(err, py_file):
118
- """
119
- Log error when Logging is specified
120
-
121
- :param err: Error to log
122
- :param python_file: File that instantiates the logger instance
123
-
124
- :return: None
125
- """
126
- if config.LOG:
127
- get_logger(py_file).error(err)
128
-
129
-
130
- def get_season(date):
131
- """
132
- Get Season based on from_date
133
-
134
- There is an exception for the 2019-2020 pandemic season. Accoding to the below url:
135
- - 2019-2020 season ends in Oct. 2020
136
- - 2020-2021 season begins in November 2020
137
- - https://nhl.nbcsports.com/2020/07/10/new-nhl-critical-dates-calendar-means-an-october-free-agent-frenzy/
138
-
139
- :param date: date
140
-
141
- :return: season -> ex: 2016 for 2016-2017 season
142
- """
143
- year = date[:4]
144
- date = datetime.strptime(date, "%Y-%m-%d")
145
- initial_bound = datetime.strptime('-'.join([year, '01-01']), "%Y-%m-%d")
146
-
147
- # End bound for year1-year2 season is later for pandemic year
148
- if initial_bound <= date <= season_end_bound(year):
149
- return int(year) - 1
150
-
151
- return int(year)
152
-
153
-
154
- def season_start_bound(year):
155
- """
156
- Get start bound for a season.
157
-
158
- Notes:
159
- - There is a bug in the schedule API for 2016 that causes the pushback to 09-30
160
- - Pandemic season started in January
161
-
162
- :param year: str of year for given date
163
-
164
- :return: str of first date in season
165
- """
166
- if int(year) == 2016:
167
- return "2016-09-30"
168
-
169
- if int(year) == 2020:
170
- return '2021-01-01'
171
-
172
- return "{}-09-01".format(str(year))
173
-
174
-
175
-
176
- def season_end_bound(year):
177
- """
178
- Determine the end bound of a given season. Changes depending on if it's the pandemic season or not
179
-
180
- :param year: str of year for given date
181
-
182
- :return: Datetime obj of last date in season
183
- """
184
- normal_end_bound = datetime.strptime('-'.join([str(year), '08-31']), "%Y-%m-%d")
185
- pandemic_end_bound = datetime.strptime('-'.join([str(year), '10-31']), "%Y-%m-%d")
186
-
187
- if int(year) == 2020:
188
- return pandemic_end_bound
189
-
190
- return normal_end_bound
191
-
192
-
193
20
  def convert_to_seconds(minutes):
194
- """
195
- Return minutes elapsed in time format to seconds elapsed
196
-
197
- :param minutes: time elapsed
198
-
199
- :return: time elapsed in seconds
200
- """
21
+ #Convert time formatted as MM:SS in a period to raw seconds
201
22
  if minutes == '-16:0-':
202
- return '1200' # Sometimes in the html at the end of the game the time is -16:0-
23
+ return '1200' #Sometimes in the html at the end of the game the time is -16:0-
203
24
 
204
- # If the time is junk not much i can do
25
+ #Validate time (invalid times are generally ignored)
205
26
  try:
206
27
  x = time.strptime(minutes.strip(' '), '%M:%S')
207
28
  except ValueError:
@@ -209,189 +30,8 @@ def convert_to_seconds(minutes):
209
30
 
210
31
  return timedelta(hours=x.tm_hour, minutes=x.tm_min, seconds=x.tm_sec).total_seconds()
211
32
 
212
-
213
- def if_rescrape(user_rescrape):
214
- """
215
- If you want to re_scrape. If someone is a dumbass and feeds it a non-boolean it terminates the program
216
-
217
- Note: Only matters when you have a directory specified
218
-
219
- :param user_rescrape: Boolean
220
-
221
- :return: None
222
- """
223
- if isinstance(user_rescrape, bool):
224
- config.RESCRAPE = user_rescrape
225
- else:
226
- raise ValueError("Error: 'if_rescrape' must be a boolean. Not a {}".format(type(user_rescrape)))
227
-
228
-
229
- def add_dir(user_dir):
230
- """
231
- Add directory to store scraped docs if valid. Or create in the home dir
232
-
233
- NOTE: After this functions docs_dir is either None or a valid directory
234
-
235
- :param user_dir: If bool=True create in the home dire or if user provided directory on their machine
236
-
237
- :return: None
238
- """
239
- # False so they don't want it
240
- if not user_dir:
241
- config.DOCS_DIR = False
242
- return
243
-
244
- # Something was given
245
- # Either True or string to directory
246
- # If boolean refer to the home directory
247
- if isinstance(user_dir, bool):
248
- config.DOCS_DIR = os.path.join(os.path.expanduser('~'), "hockey_scraper_data")
249
- # Create if needed
250
- if not os.path.isdir(config.DOCS_DIR):
251
- print_warning("Creating the hockey_scraper_data directory in the home directory")
252
- os.mkdir(config.DOCS_DIR)
253
- elif isinstance(user_dir, str) and os.path.isdir(user_dir):
254
- config.DOCS_DIR = user_dir
255
- elif not (isinstance(user_dir, str) and isinstance(user_dir, bool)):
256
- config.DOCS_DIR = False
257
- print_error("The docs_dir argument provided is invalid")
258
- else:
259
- config.DOCS_DIR = False
260
- print_error("The directory specified for the saving of scraped docs doesn't exist. Therefore:"
261
- "\n1. All specified games will be scraped from their appropriate sources (NHL or ESPN)."
262
- "\n2. All scraped files will NOT be saved at all. Please either create the directory you want them to be "
263
- "deposited in or recheck the directory you typed in and start again.\n")
264
-
265
-
266
- def scrape_page(url):
267
- """
268
- Scrape a given url
269
-
270
- :param url: url for page
271
-
272
- :return: response object
273
- """
274
- response = requests.Session()
275
- retries = Retry(total=10, backoff_factor=.1)
276
- response.mount('http://', HTTPAdapter(max_retries=retries))
277
-
278
- try:
279
- response = response.get(url, timeout=5)
280
- response.raise_for_status()
281
- page = response.text
282
- except (requests.exceptions.HTTPError, requests.exceptions.ConnectionError):
283
- page = None
284
- except requests.exceptions.ReadTimeout:
285
- # If it times out and it's the schedule print an error message...otherwise just make the page = None
286
- if "schedule" in url:
287
- raise Exception("Timeout Error: The NHL API took too long to respond to our request. "
288
- "Please Try Again (you may need to try a few times before it works). ")
289
- else:
290
- print_error("Timeout Error: The server took too long to respond to our request.")
291
- page = None
292
-
293
- # Pause for 1 second - make it more if you want
294
- time.sleep(1)
295
-
296
- return page
297
-
298
-
299
-
300
- def get_file(file_info, force=False):
301
- """
302
- Get the specified file.
303
-
304
- If a docs_dir is provided we check if it exists. If it does we see if it contains that page (and saves if it
305
- doesn't). If the docs_dir doesn't exist we just scrape from the source and not save.
306
-
307
- :param file_info: Dictionary containing the info for the file.
308
- Contains the url, name, type, and season
309
- :param force: Force a rescrape. Default is False
310
-
311
- :return: page
312
- """
313
- file_info['dir'] = config.DOCS_DIR
314
-
315
- # If everything checks out we'll retrieve it, otherwise we scrape it
316
- if file_info['dir'] and sp.check_file_exists(file_info) and not config.RESCRAPE and not force:
317
- page = sp.get_page(file_info)
318
- else:
319
- page = scrape_page(file_info['url'])
320
- sp.save_page(page, file_info)
321
-
322
- return page
323
-
324
-
325
- def check_data_format(data_format):
326
- """
327
- Checks if data_format specified (if it is at all) is either None, 'Csv', or 'pandas'.
328
- It exits program with error message if input isn't good.
329
-
330
- :param data_format: data_format provided
331
-
332
- :return: Boolean - True if good
333
- """
334
- if not data_format or data_format.lower() not in ['csv', 'pandas']:
335
- raise ValueError('{} is an unspecified data format. The two options are Csv and Pandas '
336
- '(Csv is default)\n'.format(data_format))
337
-
338
-
339
- def check_valid_dates(from_date, to_date):
340
- """
341
- Check if it's a valid date range
342
-
343
- :param from_date: date should scrape from
344
- :param to_date: date should scrape to
345
-
346
- :return: None
347
- """
348
- try:
349
- if time.strptime(to_date, "%Y-%m-%d") < time.strptime(from_date, "%Y-%m-%d"):
350
- raise ValueError("Error: The second date input is earlier than the first one")
351
- except ValueError:
352
- raise ValueError("Error: Incorrect format given for dates. They must be given like 'yyyy-mm-dd' "
353
- "(ex: '2016-10-01').")
354
-
355
-
356
- def to_csv(base_file_name, df, league, file_type):
357
- """
358
- Write DataFrame to csv file
359
-
360
- :param base_file_name: name of file
361
- :param df: DataFrame
362
- :param league: nhl or nwhl
363
- :param file_type: type of file despoiting
364
-
365
- :return: None
366
- """
367
- docs_dir = config.DOCS_DIR
368
-
369
- # This was a late addition so we add support here
370
- if isinstance(docs_dir, str) and not os.path.isdir(os.path.join(docs_dir, "csvs")):
371
- os.mkdir(os.path.join(docs_dir, "csvs"))
372
-
373
- if df is not None:
374
- if isinstance(docs_dir, str):
375
- file_name = os.path.join(docs_dir, "csvs", '{}_{}_{}.csv'.format(league, file_type, base_file_name))
376
- else:
377
- file_name = '{}_{}_{}.csv'.format(league, file_type, base_file_name)
378
-
379
- print("---> {} {} data deposited in file - {}".format(league, file_type, file_name))
380
- df.to_csv(file_name, sep=',', encoding='utf-8')
381
-
382
- import re
383
- from bs4 import BeautifulSoup, SoupStrainer
384
-
385
- ## html_pbp.py ##
386
33
  def get_contents(game_html):
387
- """
388
- Uses Beautiful soup to parses the html document.
389
- Some parsers work for some pages but don't work for others....I'm not sure why so I just try them all here in order
390
-
391
- :param game_html: html doc
392
-
393
- :return: "soupified" html
394
- """
34
+ #Parse NHL HTML PBP document
395
35
  parsers = ["html5lib", "lxml", "html.parser"]
396
36
  strainer = SoupStrainer('td', attrs={'class': re.compile(r'bborder')})
397
37
 
@@ -409,16 +49,8 @@ def get_contents(game_html):
409
49
 
410
50
  return tds
411
51
 
412
- ## html_shifts.py ##
413
52
  def get_soup(shifts_html):
414
- """
415
- Uses Beautiful soup to parses the html document.
416
- Some parsers work for some pages but don't work for others....I'm not sure why so I just try them all here in order
417
-
418
- :param shifts_html: html doc
419
-
420
- :return: "soupified" html and player_shifts portion of html (it's a bunch of td tags)
421
- """
53
+ #Convert html document to soup
422
54
  parsers = ["lxml", "html.parser", "html5lib"]
423
55
 
424
56
  for parser in parsers:
@@ -430,19 +62,12 @@ def get_soup(shifts_html):
430
62
 
431
63
  return td, get_teams(soup)
432
64
 
433
-
434
65
  def get_teams(soup):
435
- """
436
- Return the team for the TOI tables and the home team
437
-
438
- :param soup: souped up html
439
-
440
- :return: list with team and home team
441
- """
66
+ #Find and return list of teams a given document's match (for HTML shifts parsing)
442
67
  team = soup.find('td', class_='teamHeading + border') # Team for shifts
443
68
  team = team.get_text()
444
69
 
445
- # Get Home Team
70
+ #Find home team
446
71
  teams = soup.find_all('td', {'align': 'center', 'style': 'font-size: 10px;font-weight:bold'})
447
72
  regex = re.compile(r'>(.*)<br/?>')
448
73
  home_team = regex.findall(str(teams[7]))
@@ -108,7 +108,12 @@ def fix_players(pbp):
108
108
  pbp[f'add_player_{i+1}_name'] = np.where(pbp[f'event_player_{i+1}_name'].isna(),pbp[f'event_player_{i+1}_id'].astype(str).replace(names_dict),np.nan)
109
109
  pbp[f'event_player_{i+1}_name'] = pbp[f'event_player_{i+1}_name'].combine_first(pbp[f'add_player_{i+1}_name'])
110
110
 
111
- pbp['event_goalie_name'] = pbp['event_goalie_id'].astype(str).replace(names_dict)
111
+ #For the first three pbp seasons the event_goalie_id isn't included as a column
112
+ try:
113
+ pbp['event_goalie_name'] = pbp['event_goalie_id'].astype(str).replace(names_dict)
114
+ except KeyError:
115
+ pbp['event_goalie_id'] = np.where(pbp['event_team_venue']=='home',pbp['home_goalie_id'],pbp['away_goalie_id']).astype(str)
116
+ pbp['event_goalie_name'] = pbp['event_goalie_id'].astype(str).replace(names_dict)
112
117
 
113
118
  #Add hands
114
119
  pbp['event_player_1_hand'] = pbp['event_player_1_id'].astype(str).str.replace('.0','').replace(roster_dict)