wsba-hockey 1.1.8__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- wsba_hockey/__init__.py +22 -1
- wsba_hockey/tools/scraping.py +166 -190
- wsba_hockey/tools/utils/__init__.py +0 -1
- wsba_hockey/tools/utils/shared.py +14 -389
- wsba_hockey/tools/xg_model.py +6 -1
- wsba_hockey/wsba_main.py +330 -20
- {wsba_hockey-1.1.8.dist-info → wsba_hockey-1.2.0.dist-info}/METADATA +16 -15
- wsba_hockey-1.2.0.dist-info/RECORD +15 -0
- wsba_hockey/api/api/index.py +0 -162
- wsba_hockey/data_pipelines.py +0 -247
- wsba_hockey/evidence/weakside-breakout/node_modules/duckdb/vendor.py +0 -146
- wsba_hockey/evidence/weakside-breakout/node_modules/flatted/python/flatted.py +0 -149
- wsba_hockey/evidence/weakside-breakout/node_modules/flatted/python/test.py +0 -63
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/gyp_main.py +0 -45
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSNew.py +0 -367
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSProject.py +0 -206
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSSettings.py +0 -1270
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSSettings_test.py +0 -1547
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSToolFile.py +0 -59
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSUserFile.py +0 -153
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSUtil.py +0 -271
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSVersion.py +0 -574
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/__init__.py +0 -690
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/common.py +0 -661
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/common_test.py +0 -78
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/easy_xml.py +0 -165
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/easy_xml_test.py +0 -109
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/flock_tool.py +0 -55
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/__init__.py +0 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/analyzer.py +0 -808
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/android.py +0 -1173
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/cmake.py +0 -1321
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/compile_commands_json.py +0 -120
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/dump_dependency_json.py +0 -103
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/eclipse.py +0 -464
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/gypd.py +0 -89
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/gypsh.py +0 -58
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/make.py +0 -2714
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/msvs.py +0 -3981
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/msvs_test.py +0 -44
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/ninja.py +0 -2936
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/ninja_test.py +0 -55
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/xcode.py +0 -1394
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/xcode_test.py +0 -25
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/input.py +0 -3130
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/input_test.py +0 -98
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/mac_tool.py +0 -771
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/msvs_emulation.py +0 -1271
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/ninja_syntax.py +0 -174
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/simple_copy.py +0 -61
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/win_tool.py +0 -374
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/xcode_emulation.py +0 -1939
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/xcode_ninja.py +0 -302
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/xcodeproj_file.py +0 -3197
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/xml_fix.py +0 -65
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/test_gyp.py +0 -261
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/tools/graphviz.py +0 -102
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/tools/pretty_gyp.py +0 -156
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/tools/pretty_sln.py +0 -181
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/tools/pretty_vcproj.py +0 -339
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/test/fixtures/test-charmap.py +0 -31
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/update-gyp.py +0 -64
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/gyp_main.py +0 -45
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSNew.py +0 -367
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSProject.py +0 -206
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSSettings.py +0 -1270
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSSettings_test.py +0 -1547
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSToolFile.py +0 -59
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSUserFile.py +0 -153
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSUtil.py +0 -271
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSVersion.py +0 -574
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/__init__.py +0 -666
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/common.py +0 -654
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/common_test.py +0 -78
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/easy_xml.py +0 -165
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/easy_xml_test.py +0 -109
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/flock_tool.py +0 -55
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/__init__.py +0 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/analyzer.py +0 -808
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/android.py +0 -1173
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/cmake.py +0 -1321
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/compile_commands_json.py +0 -120
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/dump_dependency_json.py +0 -103
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/eclipse.py +0 -464
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/gypd.py +0 -89
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/gypsh.py +0 -58
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/make.py +0 -2518
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/msvs.py +0 -3978
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/msvs_test.py +0 -44
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/ninja.py +0 -2936
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/ninja_test.py +0 -55
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/xcode.py +0 -1394
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/xcode_test.py +0 -25
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/input.py +0 -3137
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/input_test.py +0 -98
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/mac_tool.py +0 -771
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/msvs_emulation.py +0 -1271
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/ninja_syntax.py +0 -174
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/simple_copy.py +0 -61
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/win_tool.py +0 -374
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/xcode_emulation.py +0 -1939
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/xcode_ninja.py +0 -302
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/xcodeproj_file.py +0 -3197
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/xml_fix.py +0 -65
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/setup.py +0 -42
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/test_gyp.py +0 -260
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/tools/graphviz.py +0 -102
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/tools/pretty_gyp.py +0 -156
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/tools/pretty_sln.py +0 -181
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/tools/pretty_vcproj.py +0 -339
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/test/fixtures/test-charmap.py +0 -31
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/update-gyp.py +0 -46
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/duos/app.py +0 -210
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/duos/calc.py +0 -163
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/game_stats/app.py +0 -401
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/game_stats/name_fix.py +0 -47
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/goalie/app.py +0 -101
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/goalie/plot.py +0 -71
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/goalie/rink_plot.py +0 -245
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/heatmaps/app.py +0 -108
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/heatmaps/plot.py +0 -95
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/heatmaps/rink_plot.py +0 -245
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/line-combos/app.py +0 -245
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/line-combos/plot.py +0 -275
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/line-combos/rink_plot.py +0 -245
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/matchups/app.py +0 -145
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/matchups/plot.py +0 -79
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/matchups/rink_plot.py +0 -245
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/pbp/app.py +0 -406
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/pbp/plot.py +0 -79
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/pbp/rink_plot.py +0 -245
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/skater/app.py +0 -110
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/skater/plot.py +0 -59
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/skater/rink_plot.py +0 -245
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/team_heatmaps/app.py +0 -103
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/team_heatmaps/plot.py +0 -95
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/team_heatmaps/rink_plot.py +0 -245
- wsba_hockey/flask/app.py +0 -77
- wsba_hockey/tools/utils/config.py +0 -14
- wsba_hockey/tools/utils/save_pages.py +0 -133
- wsba_hockey/workspace.py +0 -31
- wsba_hockey-1.1.8.dist-info/RECORD +0 -148
- {wsba_hockey-1.1.8.dist-info → wsba_hockey-1.2.0.dist-info}/WHEEL +0 -0
- {wsba_hockey-1.1.8.dist-info → wsba_hockey-1.2.0.dist-info}/licenses/LICENSE +0 -0
- {wsba_hockey-1.1.8.dist-info → wsba_hockey-1.2.0.dist-info}/top_level.txt +0 -0
@@ -1,207 +1,28 @@
|
|
1
|
-
### CODE IN THIS DIRECTORY ORIGINALLY (FULLY OR PARITALLY) WRITTEN BY HARRY SHOMER IN THE "hockey_scraper" PACKAGE
|
2
|
-
|
3
|
-
## shared.py ##
|
4
|
-
"""
|
5
|
-
This file is a bunch of the shared functions or just general stuff used by the different scrapers in the package.
|
6
|
-
"""
|
7
1
|
import os
|
8
2
|
import time
|
9
3
|
import json
|
10
|
-
import logging
|
11
|
-
import warnings
|
12
|
-
import requests
|
13
4
|
from datetime import datetime, timedelta
|
14
|
-
|
15
|
-
from
|
16
|
-
from . import save_pages as sp
|
17
|
-
from . import config
|
18
|
-
import inspect
|
5
|
+
import re
|
6
|
+
from bs4 import BeautifulSoup, SoupStrainer
|
19
7
|
|
20
|
-
|
21
|
-
|
8
|
+
## SHARED FUCNCTIONS ##
|
9
|
+
# Most code in this file originates (entirely or partially) from the hockey_scraper package by Harry Shomer #
|
22
10
|
|
23
|
-
|
24
|
-
with open(os.path.join(FILE_DIR, "player_name_fixes.json"), "r" ,encoding="utf-8") as f:
|
25
|
-
Names = json.load(f)['fixes']
|
11
|
+
dir = os.path.dirname(os.path.realpath(__file__))
|
26
12
|
|
27
|
-
with open(os.path.join(
|
13
|
+
with open(os.path.join(dir, "team_tri_codes.json"), "r" ,encoding="utf-8") as f:
|
28
14
|
TEAMS = json.load(f)['teams']
|
29
15
|
|
30
|
-
|
31
|
-
def fix_name(name):
|
32
|
-
"""
|
33
|
-
Check if a name falls under those that need fixing. If it does...fix it.
|
34
|
-
|
35
|
-
:param name: name in pbp
|
36
|
-
|
37
|
-
:return: Either the given parameter or the fixed name
|
38
|
-
"""
|
39
|
-
return Names.get(name.upper(), name.upper()).upper()
|
40
|
-
|
41
|
-
|
42
16
|
def get_team(team):
|
43
|
-
|
44
|
-
Get the fucking team
|
45
|
-
"""
|
17
|
+
#Parse team header in HTML
|
46
18
|
return TEAMS.get(team.upper(), team.upper()).upper()
|
47
19
|
|
48
|
-
|
49
|
-
def custom_formatwarning(msg, *args, **kwargs):
|
50
|
-
"""
|
51
|
-
Override format for standard wanings
|
52
|
-
"""
|
53
|
-
ansi_no_color = '\033[0m'
|
54
|
-
return "{msg}\n{no_color}".format(no_color=ansi_no_color, msg=msg)
|
55
|
-
|
56
|
-
warnings.formatwarning = custom_formatwarning
|
57
|
-
|
58
|
-
|
59
|
-
def print_error(msg):
|
60
|
-
"""
|
61
|
-
Implement own custom error using warning module. Prints in red
|
62
|
-
|
63
|
-
Reason why i still use warning for errors is so i can set to ignore them if i want to (e.g. live_scrape line 200).
|
64
|
-
|
65
|
-
:param msg: Str to print
|
66
|
-
|
67
|
-
:return: None
|
68
|
-
"""
|
69
|
-
ansi_red_code = '\033[0;31m'
|
70
|
-
warning_msg = "{}Error: {}".format(ansi_red_code, msg)
|
71
|
-
|
72
|
-
# if config.LOG:
|
73
|
-
# caller_file = os.path.basename(inspect.stack()[1].filename)
|
74
|
-
# get_logger(caller_file).error(msg + " " + verbose)
|
75
|
-
|
76
|
-
warnings.warn(warning_msg)
|
77
|
-
|
78
|
-
|
79
|
-
def print_warning(msg):
|
80
|
-
"""
|
81
|
-
Implement own custom warning using warning module. Prints in Orange.
|
82
|
-
|
83
|
-
:param msg: Str to print
|
84
|
-
|
85
|
-
:return: None
|
86
|
-
"""
|
87
|
-
ansi_yellow_code = '\033[0;33m'
|
88
|
-
warning_msg = "{}Warning: {}".format(ansi_yellow_code, msg)
|
89
|
-
|
90
|
-
warnings.warn(warning_msg)
|
91
|
-
|
92
|
-
|
93
|
-
def get_logger(python_file):
|
94
|
-
"""
|
95
|
-
Create a basic logger to a log file
|
96
|
-
|
97
|
-
:param python_file: File that instantiates the logger instance
|
98
|
-
|
99
|
-
:return: logger
|
100
|
-
"""
|
101
|
-
base_py_file = os.path.basename(python_file)
|
102
|
-
|
103
|
-
# If already exists we don't try to recreate it
|
104
|
-
if base_py_file in logging.Logger.manager.loggerDict.keys():
|
105
|
-
return logging.getLogger(base_py_file)
|
106
|
-
|
107
|
-
logger = logging.getLogger(base_py_file)
|
108
|
-
logger.setLevel(logging.INFO)
|
109
|
-
|
110
|
-
fh = logging.FileHandler("hockey_scraper_errors_{}.log".format(datetime.now().strftime("%Y-%m-%dT%H:%M:%S")))
|
111
|
-
fh.setFormatter(logging.Formatter('%(asctime)s\t%(name)s\t%(levelname)s\t%(message)s', datefmt='%Y-%m-%d %I:%M:%S'))
|
112
|
-
logger.addHandler(fh)
|
113
|
-
|
114
|
-
return logger
|
115
|
-
|
116
|
-
|
117
|
-
def log_error(err, py_file):
|
118
|
-
"""
|
119
|
-
Log error when Logging is specified
|
120
|
-
|
121
|
-
:param err: Error to log
|
122
|
-
:param python_file: File that instantiates the logger instance
|
123
|
-
|
124
|
-
:return: None
|
125
|
-
"""
|
126
|
-
if config.LOG:
|
127
|
-
get_logger(py_file).error(err)
|
128
|
-
|
129
|
-
|
130
|
-
def get_season(date):
|
131
|
-
"""
|
132
|
-
Get Season based on from_date
|
133
|
-
|
134
|
-
There is an exception for the 2019-2020 pandemic season. Accoding to the below url:
|
135
|
-
- 2019-2020 season ends in Oct. 2020
|
136
|
-
- 2020-2021 season begins in November 2020
|
137
|
-
- https://nhl.nbcsports.com/2020/07/10/new-nhl-critical-dates-calendar-means-an-october-free-agent-frenzy/
|
138
|
-
|
139
|
-
:param date: date
|
140
|
-
|
141
|
-
:return: season -> ex: 2016 for 2016-2017 season
|
142
|
-
"""
|
143
|
-
year = date[:4]
|
144
|
-
date = datetime.strptime(date, "%Y-%m-%d")
|
145
|
-
initial_bound = datetime.strptime('-'.join([year, '01-01']), "%Y-%m-%d")
|
146
|
-
|
147
|
-
# End bound for year1-year2 season is later for pandemic year
|
148
|
-
if initial_bound <= date <= season_end_bound(year):
|
149
|
-
return int(year) - 1
|
150
|
-
|
151
|
-
return int(year)
|
152
|
-
|
153
|
-
|
154
|
-
def season_start_bound(year):
|
155
|
-
"""
|
156
|
-
Get start bound for a season.
|
157
|
-
|
158
|
-
Notes:
|
159
|
-
- There is a bug in the schedule API for 2016 that causes the pushback to 09-30
|
160
|
-
- Pandemic season started in January
|
161
|
-
|
162
|
-
:param year: str of year for given date
|
163
|
-
|
164
|
-
:return: str of first date in season
|
165
|
-
"""
|
166
|
-
if int(year) == 2016:
|
167
|
-
return "2016-09-30"
|
168
|
-
|
169
|
-
if int(year) == 2020:
|
170
|
-
return '2021-01-01'
|
171
|
-
|
172
|
-
return "{}-09-01".format(str(year))
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
def season_end_bound(year):
|
177
|
-
"""
|
178
|
-
Determine the end bound of a given season. Changes depending on if it's the pandemic season or not
|
179
|
-
|
180
|
-
:param year: str of year for given date
|
181
|
-
|
182
|
-
:return: Datetime obj of last date in season
|
183
|
-
"""
|
184
|
-
normal_end_bound = datetime.strptime('-'.join([str(year), '08-31']), "%Y-%m-%d")
|
185
|
-
pandemic_end_bound = datetime.strptime('-'.join([str(year), '10-31']), "%Y-%m-%d")
|
186
|
-
|
187
|
-
if int(year) == 2020:
|
188
|
-
return pandemic_end_bound
|
189
|
-
|
190
|
-
return normal_end_bound
|
191
|
-
|
192
|
-
|
193
20
|
def convert_to_seconds(minutes):
|
194
|
-
|
195
|
-
Return minutes elapsed in time format to seconds elapsed
|
196
|
-
|
197
|
-
:param minutes: time elapsed
|
198
|
-
|
199
|
-
:return: time elapsed in seconds
|
200
|
-
"""
|
21
|
+
#Convert time formatted as MM:SS in a period to raw seconds
|
201
22
|
if minutes == '-16:0-':
|
202
|
-
return '1200' #
|
23
|
+
return '1200' #Sometimes in the html at the end of the game the time is -16:0-
|
203
24
|
|
204
|
-
#
|
25
|
+
#Validate time (invalid times are generally ignored)
|
205
26
|
try:
|
206
27
|
x = time.strptime(minutes.strip(' '), '%M:%S')
|
207
28
|
except ValueError:
|
@@ -209,189 +30,8 @@ def convert_to_seconds(minutes):
|
|
209
30
|
|
210
31
|
return timedelta(hours=x.tm_hour, minutes=x.tm_min, seconds=x.tm_sec).total_seconds()
|
211
32
|
|
212
|
-
|
213
|
-
def if_rescrape(user_rescrape):
|
214
|
-
"""
|
215
|
-
If you want to re_scrape. If someone is a dumbass and feeds it a non-boolean it terminates the program
|
216
|
-
|
217
|
-
Note: Only matters when you have a directory specified
|
218
|
-
|
219
|
-
:param user_rescrape: Boolean
|
220
|
-
|
221
|
-
:return: None
|
222
|
-
"""
|
223
|
-
if isinstance(user_rescrape, bool):
|
224
|
-
config.RESCRAPE = user_rescrape
|
225
|
-
else:
|
226
|
-
raise ValueError("Error: 'if_rescrape' must be a boolean. Not a {}".format(type(user_rescrape)))
|
227
|
-
|
228
|
-
|
229
|
-
def add_dir(user_dir):
|
230
|
-
"""
|
231
|
-
Add directory to store scraped docs if valid. Or create in the home dir
|
232
|
-
|
233
|
-
NOTE: After this functions docs_dir is either None or a valid directory
|
234
|
-
|
235
|
-
:param user_dir: If bool=True create in the home dire or if user provided directory on their machine
|
236
|
-
|
237
|
-
:return: None
|
238
|
-
"""
|
239
|
-
# False so they don't want it
|
240
|
-
if not user_dir:
|
241
|
-
config.DOCS_DIR = False
|
242
|
-
return
|
243
|
-
|
244
|
-
# Something was given
|
245
|
-
# Either True or string to directory
|
246
|
-
# If boolean refer to the home directory
|
247
|
-
if isinstance(user_dir, bool):
|
248
|
-
config.DOCS_DIR = os.path.join(os.path.expanduser('~'), "hockey_scraper_data")
|
249
|
-
# Create if needed
|
250
|
-
if not os.path.isdir(config.DOCS_DIR):
|
251
|
-
print_warning("Creating the hockey_scraper_data directory in the home directory")
|
252
|
-
os.mkdir(config.DOCS_DIR)
|
253
|
-
elif isinstance(user_dir, str) and os.path.isdir(user_dir):
|
254
|
-
config.DOCS_DIR = user_dir
|
255
|
-
elif not (isinstance(user_dir, str) and isinstance(user_dir, bool)):
|
256
|
-
config.DOCS_DIR = False
|
257
|
-
print_error("The docs_dir argument provided is invalid")
|
258
|
-
else:
|
259
|
-
config.DOCS_DIR = False
|
260
|
-
print_error("The directory specified for the saving of scraped docs doesn't exist. Therefore:"
|
261
|
-
"\n1. All specified games will be scraped from their appropriate sources (NHL or ESPN)."
|
262
|
-
"\n2. All scraped files will NOT be saved at all. Please either create the directory you want them to be "
|
263
|
-
"deposited in or recheck the directory you typed in and start again.\n")
|
264
|
-
|
265
|
-
|
266
|
-
def scrape_page(url):
|
267
|
-
"""
|
268
|
-
Scrape a given url
|
269
|
-
|
270
|
-
:param url: url for page
|
271
|
-
|
272
|
-
:return: response object
|
273
|
-
"""
|
274
|
-
response = requests.Session()
|
275
|
-
retries = Retry(total=10, backoff_factor=.1)
|
276
|
-
response.mount('http://', HTTPAdapter(max_retries=retries))
|
277
|
-
|
278
|
-
try:
|
279
|
-
response = response.get(url, timeout=5)
|
280
|
-
response.raise_for_status()
|
281
|
-
page = response.text
|
282
|
-
except (requests.exceptions.HTTPError, requests.exceptions.ConnectionError):
|
283
|
-
page = None
|
284
|
-
except requests.exceptions.ReadTimeout:
|
285
|
-
# If it times out and it's the schedule print an error message...otherwise just make the page = None
|
286
|
-
if "schedule" in url:
|
287
|
-
raise Exception("Timeout Error: The NHL API took too long to respond to our request. "
|
288
|
-
"Please Try Again (you may need to try a few times before it works). ")
|
289
|
-
else:
|
290
|
-
print_error("Timeout Error: The server took too long to respond to our request.")
|
291
|
-
page = None
|
292
|
-
|
293
|
-
# Pause for 1 second - make it more if you want
|
294
|
-
time.sleep(1)
|
295
|
-
|
296
|
-
return page
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
def get_file(file_info, force=False):
|
301
|
-
"""
|
302
|
-
Get the specified file.
|
303
|
-
|
304
|
-
If a docs_dir is provided we check if it exists. If it does we see if it contains that page (and saves if it
|
305
|
-
doesn't). If the docs_dir doesn't exist we just scrape from the source and not save.
|
306
|
-
|
307
|
-
:param file_info: Dictionary containing the info for the file.
|
308
|
-
Contains the url, name, type, and season
|
309
|
-
:param force: Force a rescrape. Default is False
|
310
|
-
|
311
|
-
:return: page
|
312
|
-
"""
|
313
|
-
file_info['dir'] = config.DOCS_DIR
|
314
|
-
|
315
|
-
# If everything checks out we'll retrieve it, otherwise we scrape it
|
316
|
-
if file_info['dir'] and sp.check_file_exists(file_info) and not config.RESCRAPE and not force:
|
317
|
-
page = sp.get_page(file_info)
|
318
|
-
else:
|
319
|
-
page = scrape_page(file_info['url'])
|
320
|
-
sp.save_page(page, file_info)
|
321
|
-
|
322
|
-
return page
|
323
|
-
|
324
|
-
|
325
|
-
def check_data_format(data_format):
|
326
|
-
"""
|
327
|
-
Checks if data_format specified (if it is at all) is either None, 'Csv', or 'pandas'.
|
328
|
-
It exits program with error message if input isn't good.
|
329
|
-
|
330
|
-
:param data_format: data_format provided
|
331
|
-
|
332
|
-
:return: Boolean - True if good
|
333
|
-
"""
|
334
|
-
if not data_format or data_format.lower() not in ['csv', 'pandas']:
|
335
|
-
raise ValueError('{} is an unspecified data format. The two options are Csv and Pandas '
|
336
|
-
'(Csv is default)\n'.format(data_format))
|
337
|
-
|
338
|
-
|
339
|
-
def check_valid_dates(from_date, to_date):
|
340
|
-
"""
|
341
|
-
Check if it's a valid date range
|
342
|
-
|
343
|
-
:param from_date: date should scrape from
|
344
|
-
:param to_date: date should scrape to
|
345
|
-
|
346
|
-
:return: None
|
347
|
-
"""
|
348
|
-
try:
|
349
|
-
if time.strptime(to_date, "%Y-%m-%d") < time.strptime(from_date, "%Y-%m-%d"):
|
350
|
-
raise ValueError("Error: The second date input is earlier than the first one")
|
351
|
-
except ValueError:
|
352
|
-
raise ValueError("Error: Incorrect format given for dates. They must be given like 'yyyy-mm-dd' "
|
353
|
-
"(ex: '2016-10-01').")
|
354
|
-
|
355
|
-
|
356
|
-
def to_csv(base_file_name, df, league, file_type):
|
357
|
-
"""
|
358
|
-
Write DataFrame to csv file
|
359
|
-
|
360
|
-
:param base_file_name: name of file
|
361
|
-
:param df: DataFrame
|
362
|
-
:param league: nhl or nwhl
|
363
|
-
:param file_type: type of file despoiting
|
364
|
-
|
365
|
-
:return: None
|
366
|
-
"""
|
367
|
-
docs_dir = config.DOCS_DIR
|
368
|
-
|
369
|
-
# This was a late addition so we add support here
|
370
|
-
if isinstance(docs_dir, str) and not os.path.isdir(os.path.join(docs_dir, "csvs")):
|
371
|
-
os.mkdir(os.path.join(docs_dir, "csvs"))
|
372
|
-
|
373
|
-
if df is not None:
|
374
|
-
if isinstance(docs_dir, str):
|
375
|
-
file_name = os.path.join(docs_dir, "csvs", '{}_{}_{}.csv'.format(league, file_type, base_file_name))
|
376
|
-
else:
|
377
|
-
file_name = '{}_{}_{}.csv'.format(league, file_type, base_file_name)
|
378
|
-
|
379
|
-
print("---> {} {} data deposited in file - {}".format(league, file_type, file_name))
|
380
|
-
df.to_csv(file_name, sep=',', encoding='utf-8')
|
381
|
-
|
382
|
-
import re
|
383
|
-
from bs4 import BeautifulSoup, SoupStrainer
|
384
|
-
|
385
|
-
## html_pbp.py ##
|
386
33
|
def get_contents(game_html):
|
387
|
-
|
388
|
-
Uses Beautiful soup to parses the html document.
|
389
|
-
Some parsers work for some pages but don't work for others....I'm not sure why so I just try them all here in order
|
390
|
-
|
391
|
-
:param game_html: html doc
|
392
|
-
|
393
|
-
:return: "soupified" html
|
394
|
-
"""
|
34
|
+
#Parse NHL HTML PBP document
|
395
35
|
parsers = ["html5lib", "lxml", "html.parser"]
|
396
36
|
strainer = SoupStrainer('td', attrs={'class': re.compile(r'bborder')})
|
397
37
|
|
@@ -409,16 +49,8 @@ def get_contents(game_html):
|
|
409
49
|
|
410
50
|
return tds
|
411
51
|
|
412
|
-
## html_shifts.py ##
|
413
52
|
def get_soup(shifts_html):
|
414
|
-
|
415
|
-
Uses Beautiful soup to parses the html document.
|
416
|
-
Some parsers work for some pages but don't work for others....I'm not sure why so I just try them all here in order
|
417
|
-
|
418
|
-
:param shifts_html: html doc
|
419
|
-
|
420
|
-
:return: "soupified" html and player_shifts portion of html (it's a bunch of td tags)
|
421
|
-
"""
|
53
|
+
#Convert html document to soup
|
422
54
|
parsers = ["lxml", "html.parser", "html5lib"]
|
423
55
|
|
424
56
|
for parser in parsers:
|
@@ -430,19 +62,12 @@ def get_soup(shifts_html):
|
|
430
62
|
|
431
63
|
return td, get_teams(soup)
|
432
64
|
|
433
|
-
|
434
65
|
def get_teams(soup):
|
435
|
-
|
436
|
-
Return the team for the TOI tables and the home team
|
437
|
-
|
438
|
-
:param soup: souped up html
|
439
|
-
|
440
|
-
:return: list with team and home team
|
441
|
-
"""
|
66
|
+
#Find and return list of teams a given document's match (for HTML shifts parsing)
|
442
67
|
team = soup.find('td', class_='teamHeading + border') # Team for shifts
|
443
68
|
team = team.get_text()
|
444
69
|
|
445
|
-
#
|
70
|
+
#Find home team
|
446
71
|
teams = soup.find_all('td', {'align': 'center', 'style': 'font-size: 10px;font-weight:bold'})
|
447
72
|
regex = re.compile(r'>(.*)<br/?>')
|
448
73
|
home_team = regex.findall(str(teams[7]))
|
wsba_hockey/tools/xg_model.py
CHANGED
@@ -108,7 +108,12 @@ def fix_players(pbp):
|
|
108
108
|
pbp[f'add_player_{i+1}_name'] = np.where(pbp[f'event_player_{i+1}_name'].isna(),pbp[f'event_player_{i+1}_id'].astype(str).replace(names_dict),np.nan)
|
109
109
|
pbp[f'event_player_{i+1}_name'] = pbp[f'event_player_{i+1}_name'].combine_first(pbp[f'add_player_{i+1}_name'])
|
110
110
|
|
111
|
-
pbp
|
111
|
+
#For the first three pbp seasons the event_goalie_id isn't included as a column
|
112
|
+
try:
|
113
|
+
pbp['event_goalie_name'] = pbp['event_goalie_id'].astype(str).replace(names_dict)
|
114
|
+
except KeyError:
|
115
|
+
pbp['event_goalie_id'] = np.where(pbp['event_team_venue']=='home',pbp['home_goalie_id'],pbp['away_goalie_id']).astype(str)
|
116
|
+
pbp['event_goalie_name'] = pbp['event_goalie_id'].astype(str).replace(names_dict)
|
112
117
|
|
113
118
|
#Add hands
|
114
119
|
pbp['event_player_1_hand'] = pbp['event_player_1_id'].astype(str).str.replace('.0','').replace(roster_dict)
|