wsba-hockey 1.1.9__py3-none-any.whl → 1.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- wsba_hockey/tools/scraping.py +146 -170
- wsba_hockey/tools/utils/__init__.py +0 -1
- wsba_hockey/tools/utils/shared.py +14 -389
- wsba_hockey/tools/xg_model.py +6 -1
- wsba_hockey/wsba_main.py +47 -14
- {wsba_hockey-1.1.9.dist-info → wsba_hockey-1.2.1.dist-info}/METADATA +16 -15
- wsba_hockey-1.2.1.dist-info/RECORD +15 -0
- wsba_hockey/api/api/index.py +0 -162
- wsba_hockey/data_pipelines.py +0 -247
- wsba_hockey/evidence/weakside-breakout/node_modules/duckdb/vendor.py +0 -146
- wsba_hockey/evidence/weakside-breakout/node_modules/flatted/python/flatted.py +0 -149
- wsba_hockey/evidence/weakside-breakout/node_modules/flatted/python/test.py +0 -63
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/gyp_main.py +0 -45
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSNew.py +0 -367
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSProject.py +0 -206
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSSettings.py +0 -1270
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSSettings_test.py +0 -1547
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSToolFile.py +0 -59
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSUserFile.py +0 -153
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSUtil.py +0 -271
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/MSVSVersion.py +0 -574
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/__init__.py +0 -690
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/common.py +0 -661
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/common_test.py +0 -78
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/easy_xml.py +0 -165
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/easy_xml_test.py +0 -109
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/flock_tool.py +0 -55
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/__init__.py +0 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/analyzer.py +0 -808
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/android.py +0 -1173
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/cmake.py +0 -1321
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/compile_commands_json.py +0 -120
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/dump_dependency_json.py +0 -103
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/eclipse.py +0 -464
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/gypd.py +0 -89
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/gypsh.py +0 -58
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/make.py +0 -2714
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/msvs.py +0 -3981
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/msvs_test.py +0 -44
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/ninja.py +0 -2936
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/ninja_test.py +0 -55
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/xcode.py +0 -1394
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/generator/xcode_test.py +0 -25
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/input.py +0 -3130
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/input_test.py +0 -98
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/mac_tool.py +0 -771
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/msvs_emulation.py +0 -1271
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/ninja_syntax.py +0 -174
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/simple_copy.py +0 -61
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/win_tool.py +0 -374
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/xcode_emulation.py +0 -1939
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/xcode_ninja.py +0 -302
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/xcodeproj_file.py +0 -3197
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/pylib/gyp/xml_fix.py +0 -65
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/test_gyp.py +0 -261
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/tools/graphviz.py +0 -102
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/tools/pretty_gyp.py +0 -156
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/tools/pretty_sln.py +0 -181
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/gyp/tools/pretty_vcproj.py +0 -339
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/test/fixtures/test-charmap.py +0 -31
- wsba_hockey/evidence/weakside-breakout/node_modules/node-gyp/update-gyp.py +0 -64
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/gyp_main.py +0 -45
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSNew.py +0 -367
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSProject.py +0 -206
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSSettings.py +0 -1270
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSSettings_test.py +0 -1547
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSToolFile.py +0 -59
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSUserFile.py +0 -153
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSUtil.py +0 -271
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/MSVSVersion.py +0 -574
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/__init__.py +0 -666
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/common.py +0 -654
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/common_test.py +0 -78
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/easy_xml.py +0 -165
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/easy_xml_test.py +0 -109
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/flock_tool.py +0 -55
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/__init__.py +0 -0
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/analyzer.py +0 -808
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/android.py +0 -1173
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/cmake.py +0 -1321
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/compile_commands_json.py +0 -120
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/dump_dependency_json.py +0 -103
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/eclipse.py +0 -464
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/gypd.py +0 -89
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/gypsh.py +0 -58
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/make.py +0 -2518
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/msvs.py +0 -3978
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/msvs_test.py +0 -44
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/ninja.py +0 -2936
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/ninja_test.py +0 -55
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/xcode.py +0 -1394
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/generator/xcode_test.py +0 -25
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/input.py +0 -3137
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/input_test.py +0 -98
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/mac_tool.py +0 -771
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/msvs_emulation.py +0 -1271
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/ninja_syntax.py +0 -174
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/simple_copy.py +0 -61
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/win_tool.py +0 -374
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/xcode_emulation.py +0 -1939
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/xcode_ninja.py +0 -302
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/xcodeproj_file.py +0 -3197
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/pylib/gyp/xml_fix.py +0 -65
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/setup.py +0 -42
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/test_gyp.py +0 -260
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/tools/graphviz.py +0 -102
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/tools/pretty_gyp.py +0 -156
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/tools/pretty_sln.py +0 -181
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/gyp/tools/pretty_vcproj.py +0 -339
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/test/fixtures/test-charmap.py +0 -31
- wsba_hockey/evidence/weakside-breakout/node_modules/sqlite3/node_modules/node-gyp/update-gyp.py +0 -46
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/duos/app.py +0 -210
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/duos/calc.py +0 -163
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/game_stats/app.py +0 -401
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/game_stats/name_fix.py +0 -47
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/goalie/app.py +0 -101
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/goalie/plot.py +0 -71
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/goalie/rink_plot.py +0 -245
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/heatmaps/app.py +0 -108
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/heatmaps/plot.py +0 -95
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/heatmaps/rink_plot.py +0 -245
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/line-combos/app.py +0 -245
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/line-combos/plot.py +0 -275
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/line-combos/rink_plot.py +0 -245
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/matchups/app.py +0 -145
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/matchups/plot.py +0 -79
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/matchups/rink_plot.py +0 -245
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/pbp/app.py +0 -406
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/pbp/plot.py +0 -79
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/pbp/rink_plot.py +0 -245
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/skater/app.py +0 -110
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/skater/plot.py +0 -59
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/skater/rink_plot.py +0 -245
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/team_heatmaps/app.py +0 -103
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/team_heatmaps/plot.py +0 -95
- wsba_hockey/evidence/weakside-breakout/wsba_nhl_apps/wsba_nhl_apps/team_heatmaps/rink_plot.py +0 -245
- wsba_hockey/flask/app.py +0 -77
- wsba_hockey/tools/utils/config.py +0 -14
- wsba_hockey/tools/utils/save_pages.py +0 -133
- wsba_hockey/workspace.py +0 -28
- wsba_hockey-1.1.9.dist-info/RECORD +0 -148
- {wsba_hockey-1.1.9.dist-info → wsba_hockey-1.2.1.dist-info}/WHEEL +0 -0
- {wsba_hockey-1.1.9.dist-info → wsba_hockey-1.2.1.dist-info}/licenses/LICENSE +0 -0
- {wsba_hockey-1.1.9.dist-info → wsba_hockey-1.2.1.dist-info}/top_level.txt +0 -0
@@ -1,207 +1,28 @@
|
|
1
|
-
### CODE IN THIS DIRECTORY ORIGINALLY (FULLY OR PARITALLY) WRITTEN BY HARRY SHOMER IN THE "hockey_scraper" PACKAGE
|
2
|
-
|
3
|
-
## shared.py ##
|
4
|
-
"""
|
5
|
-
This file is a bunch of the shared functions or just general stuff used by the different scrapers in the package.
|
6
|
-
"""
|
7
1
|
import os
|
8
2
|
import time
|
9
3
|
import json
|
10
|
-
import logging
|
11
|
-
import warnings
|
12
|
-
import requests
|
13
4
|
from datetime import datetime, timedelta
|
14
|
-
|
15
|
-
from
|
16
|
-
from . import save_pages as sp
|
17
|
-
from . import config
|
18
|
-
import inspect
|
5
|
+
import re
|
6
|
+
from bs4 import BeautifulSoup, SoupStrainer
|
19
7
|
|
20
|
-
|
21
|
-
|
8
|
+
## SHARED FUCNCTIONS ##
|
9
|
+
# Most code in this file originates (entirely or partially) from the hockey_scraper package by Harry Shomer #
|
22
10
|
|
23
|
-
|
24
|
-
with open(os.path.join(FILE_DIR, "player_name_fixes.json"), "r" ,encoding="utf-8") as f:
|
25
|
-
Names = json.load(f)['fixes']
|
11
|
+
dir = os.path.dirname(os.path.realpath(__file__))
|
26
12
|
|
27
|
-
with open(os.path.join(
|
13
|
+
with open(os.path.join(dir, "team_tri_codes.json"), "r" ,encoding="utf-8") as f:
|
28
14
|
TEAMS = json.load(f)['teams']
|
29
15
|
|
30
|
-
|
31
|
-
def fix_name(name):
|
32
|
-
"""
|
33
|
-
Check if a name falls under those that need fixing. If it does...fix it.
|
34
|
-
|
35
|
-
:param name: name in pbp
|
36
|
-
|
37
|
-
:return: Either the given parameter or the fixed name
|
38
|
-
"""
|
39
|
-
return Names.get(name.upper(), name.upper()).upper()
|
40
|
-
|
41
|
-
|
42
16
|
def get_team(team):
|
43
|
-
|
44
|
-
Get the fucking team
|
45
|
-
"""
|
17
|
+
#Parse team header in HTML
|
46
18
|
return TEAMS.get(team.upper(), team.upper()).upper()
|
47
19
|
|
48
|
-
|
49
|
-
def custom_formatwarning(msg, *args, **kwargs):
|
50
|
-
"""
|
51
|
-
Override format for standard wanings
|
52
|
-
"""
|
53
|
-
ansi_no_color = '\033[0m'
|
54
|
-
return "{msg}\n{no_color}".format(no_color=ansi_no_color, msg=msg)
|
55
|
-
|
56
|
-
warnings.formatwarning = custom_formatwarning
|
57
|
-
|
58
|
-
|
59
|
-
def print_error(msg):
|
60
|
-
"""
|
61
|
-
Implement own custom error using warning module. Prints in red
|
62
|
-
|
63
|
-
Reason why i still use warning for errors is so i can set to ignore them if i want to (e.g. live_scrape line 200).
|
64
|
-
|
65
|
-
:param msg: Str to print
|
66
|
-
|
67
|
-
:return: None
|
68
|
-
"""
|
69
|
-
ansi_red_code = '\033[0;31m'
|
70
|
-
warning_msg = "{}Error: {}".format(ansi_red_code, msg)
|
71
|
-
|
72
|
-
# if config.LOG:
|
73
|
-
# caller_file = os.path.basename(inspect.stack()[1].filename)
|
74
|
-
# get_logger(caller_file).error(msg + " " + verbose)
|
75
|
-
|
76
|
-
warnings.warn(warning_msg)
|
77
|
-
|
78
|
-
|
79
|
-
def print_warning(msg):
|
80
|
-
"""
|
81
|
-
Implement own custom warning using warning module. Prints in Orange.
|
82
|
-
|
83
|
-
:param msg: Str to print
|
84
|
-
|
85
|
-
:return: None
|
86
|
-
"""
|
87
|
-
ansi_yellow_code = '\033[0;33m'
|
88
|
-
warning_msg = "{}Warning: {}".format(ansi_yellow_code, msg)
|
89
|
-
|
90
|
-
warnings.warn(warning_msg)
|
91
|
-
|
92
|
-
|
93
|
-
def get_logger(python_file):
|
94
|
-
"""
|
95
|
-
Create a basic logger to a log file
|
96
|
-
|
97
|
-
:param python_file: File that instantiates the logger instance
|
98
|
-
|
99
|
-
:return: logger
|
100
|
-
"""
|
101
|
-
base_py_file = os.path.basename(python_file)
|
102
|
-
|
103
|
-
# If already exists we don't try to recreate it
|
104
|
-
if base_py_file in logging.Logger.manager.loggerDict.keys():
|
105
|
-
return logging.getLogger(base_py_file)
|
106
|
-
|
107
|
-
logger = logging.getLogger(base_py_file)
|
108
|
-
logger.setLevel(logging.INFO)
|
109
|
-
|
110
|
-
fh = logging.FileHandler("hockey_scraper_errors_{}.log".format(datetime.now().strftime("%Y-%m-%dT%H:%M:%S")))
|
111
|
-
fh.setFormatter(logging.Formatter('%(asctime)s\t%(name)s\t%(levelname)s\t%(message)s', datefmt='%Y-%m-%d %I:%M:%S'))
|
112
|
-
logger.addHandler(fh)
|
113
|
-
|
114
|
-
return logger
|
115
|
-
|
116
|
-
|
117
|
-
def log_error(err, py_file):
|
118
|
-
"""
|
119
|
-
Log error when Logging is specified
|
120
|
-
|
121
|
-
:param err: Error to log
|
122
|
-
:param python_file: File that instantiates the logger instance
|
123
|
-
|
124
|
-
:return: None
|
125
|
-
"""
|
126
|
-
if config.LOG:
|
127
|
-
get_logger(py_file).error(err)
|
128
|
-
|
129
|
-
|
130
|
-
def get_season(date):
|
131
|
-
"""
|
132
|
-
Get Season based on from_date
|
133
|
-
|
134
|
-
There is an exception for the 2019-2020 pandemic season. Accoding to the below url:
|
135
|
-
- 2019-2020 season ends in Oct. 2020
|
136
|
-
- 2020-2021 season begins in November 2020
|
137
|
-
- https://nhl.nbcsports.com/2020/07/10/new-nhl-critical-dates-calendar-means-an-october-free-agent-frenzy/
|
138
|
-
|
139
|
-
:param date: date
|
140
|
-
|
141
|
-
:return: season -> ex: 2016 for 2016-2017 season
|
142
|
-
"""
|
143
|
-
year = date[:4]
|
144
|
-
date = datetime.strptime(date, "%Y-%m-%d")
|
145
|
-
initial_bound = datetime.strptime('-'.join([year, '01-01']), "%Y-%m-%d")
|
146
|
-
|
147
|
-
# End bound for year1-year2 season is later for pandemic year
|
148
|
-
if initial_bound <= date <= season_end_bound(year):
|
149
|
-
return int(year) - 1
|
150
|
-
|
151
|
-
return int(year)
|
152
|
-
|
153
|
-
|
154
|
-
def season_start_bound(year):
|
155
|
-
"""
|
156
|
-
Get start bound for a season.
|
157
|
-
|
158
|
-
Notes:
|
159
|
-
- There is a bug in the schedule API for 2016 that causes the pushback to 09-30
|
160
|
-
- Pandemic season started in January
|
161
|
-
|
162
|
-
:param year: str of year for given date
|
163
|
-
|
164
|
-
:return: str of first date in season
|
165
|
-
"""
|
166
|
-
if int(year) == 2016:
|
167
|
-
return "2016-09-30"
|
168
|
-
|
169
|
-
if int(year) == 2020:
|
170
|
-
return '2021-01-01'
|
171
|
-
|
172
|
-
return "{}-09-01".format(str(year))
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
def season_end_bound(year):
|
177
|
-
"""
|
178
|
-
Determine the end bound of a given season. Changes depending on if it's the pandemic season or not
|
179
|
-
|
180
|
-
:param year: str of year for given date
|
181
|
-
|
182
|
-
:return: Datetime obj of last date in season
|
183
|
-
"""
|
184
|
-
normal_end_bound = datetime.strptime('-'.join([str(year), '08-31']), "%Y-%m-%d")
|
185
|
-
pandemic_end_bound = datetime.strptime('-'.join([str(year), '10-31']), "%Y-%m-%d")
|
186
|
-
|
187
|
-
if int(year) == 2020:
|
188
|
-
return pandemic_end_bound
|
189
|
-
|
190
|
-
return normal_end_bound
|
191
|
-
|
192
|
-
|
193
20
|
def convert_to_seconds(minutes):
|
194
|
-
|
195
|
-
Return minutes elapsed in time format to seconds elapsed
|
196
|
-
|
197
|
-
:param minutes: time elapsed
|
198
|
-
|
199
|
-
:return: time elapsed in seconds
|
200
|
-
"""
|
21
|
+
#Convert time formatted as MM:SS in a period to raw seconds
|
201
22
|
if minutes == '-16:0-':
|
202
|
-
return '1200' #
|
23
|
+
return '1200' #Sometimes in the html at the end of the game the time is -16:0-
|
203
24
|
|
204
|
-
#
|
25
|
+
#Validate time (invalid times are generally ignored)
|
205
26
|
try:
|
206
27
|
x = time.strptime(minutes.strip(' '), '%M:%S')
|
207
28
|
except ValueError:
|
@@ -209,189 +30,8 @@ def convert_to_seconds(minutes):
|
|
209
30
|
|
210
31
|
return timedelta(hours=x.tm_hour, minutes=x.tm_min, seconds=x.tm_sec).total_seconds()
|
211
32
|
|
212
|
-
|
213
|
-
def if_rescrape(user_rescrape):
|
214
|
-
"""
|
215
|
-
If you want to re_scrape. If someone is a dumbass and feeds it a non-boolean it terminates the program
|
216
|
-
|
217
|
-
Note: Only matters when you have a directory specified
|
218
|
-
|
219
|
-
:param user_rescrape: Boolean
|
220
|
-
|
221
|
-
:return: None
|
222
|
-
"""
|
223
|
-
if isinstance(user_rescrape, bool):
|
224
|
-
config.RESCRAPE = user_rescrape
|
225
|
-
else:
|
226
|
-
raise ValueError("Error: 'if_rescrape' must be a boolean. Not a {}".format(type(user_rescrape)))
|
227
|
-
|
228
|
-
|
229
|
-
def add_dir(user_dir):
|
230
|
-
"""
|
231
|
-
Add directory to store scraped docs if valid. Or create in the home dir
|
232
|
-
|
233
|
-
NOTE: After this functions docs_dir is either None or a valid directory
|
234
|
-
|
235
|
-
:param user_dir: If bool=True create in the home dire or if user provided directory on their machine
|
236
|
-
|
237
|
-
:return: None
|
238
|
-
"""
|
239
|
-
# False so they don't want it
|
240
|
-
if not user_dir:
|
241
|
-
config.DOCS_DIR = False
|
242
|
-
return
|
243
|
-
|
244
|
-
# Something was given
|
245
|
-
# Either True or string to directory
|
246
|
-
# If boolean refer to the home directory
|
247
|
-
if isinstance(user_dir, bool):
|
248
|
-
config.DOCS_DIR = os.path.join(os.path.expanduser('~'), "hockey_scraper_data")
|
249
|
-
# Create if needed
|
250
|
-
if not os.path.isdir(config.DOCS_DIR):
|
251
|
-
print_warning("Creating the hockey_scraper_data directory in the home directory")
|
252
|
-
os.mkdir(config.DOCS_DIR)
|
253
|
-
elif isinstance(user_dir, str) and os.path.isdir(user_dir):
|
254
|
-
config.DOCS_DIR = user_dir
|
255
|
-
elif not (isinstance(user_dir, str) and isinstance(user_dir, bool)):
|
256
|
-
config.DOCS_DIR = False
|
257
|
-
print_error("The docs_dir argument provided is invalid")
|
258
|
-
else:
|
259
|
-
config.DOCS_DIR = False
|
260
|
-
print_error("The directory specified for the saving of scraped docs doesn't exist. Therefore:"
|
261
|
-
"\n1. All specified games will be scraped from their appropriate sources (NHL or ESPN)."
|
262
|
-
"\n2. All scraped files will NOT be saved at all. Please either create the directory you want them to be "
|
263
|
-
"deposited in or recheck the directory you typed in and start again.\n")
|
264
|
-
|
265
|
-
|
266
|
-
def scrape_page(url):
|
267
|
-
"""
|
268
|
-
Scrape a given url
|
269
|
-
|
270
|
-
:param url: url for page
|
271
|
-
|
272
|
-
:return: response object
|
273
|
-
"""
|
274
|
-
response = requests.Session()
|
275
|
-
retries = Retry(total=10, backoff_factor=.1)
|
276
|
-
response.mount('http://', HTTPAdapter(max_retries=retries))
|
277
|
-
|
278
|
-
try:
|
279
|
-
response = response.get(url, timeout=5)
|
280
|
-
response.raise_for_status()
|
281
|
-
page = response.text
|
282
|
-
except (requests.exceptions.HTTPError, requests.exceptions.ConnectionError):
|
283
|
-
page = None
|
284
|
-
except requests.exceptions.ReadTimeout:
|
285
|
-
# If it times out and it's the schedule print an error message...otherwise just make the page = None
|
286
|
-
if "schedule" in url:
|
287
|
-
raise Exception("Timeout Error: The NHL API took too long to respond to our request. "
|
288
|
-
"Please Try Again (you may need to try a few times before it works). ")
|
289
|
-
else:
|
290
|
-
print_error("Timeout Error: The server took too long to respond to our request.")
|
291
|
-
page = None
|
292
|
-
|
293
|
-
# Pause for 1 second - make it more if you want
|
294
|
-
time.sleep(1)
|
295
|
-
|
296
|
-
return page
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
def get_file(file_info, force=False):
|
301
|
-
"""
|
302
|
-
Get the specified file.
|
303
|
-
|
304
|
-
If a docs_dir is provided we check if it exists. If it does we see if it contains that page (and saves if it
|
305
|
-
doesn't). If the docs_dir doesn't exist we just scrape from the source and not save.
|
306
|
-
|
307
|
-
:param file_info: Dictionary containing the info for the file.
|
308
|
-
Contains the url, name, type, and season
|
309
|
-
:param force: Force a rescrape. Default is False
|
310
|
-
|
311
|
-
:return: page
|
312
|
-
"""
|
313
|
-
file_info['dir'] = config.DOCS_DIR
|
314
|
-
|
315
|
-
# If everything checks out we'll retrieve it, otherwise we scrape it
|
316
|
-
if file_info['dir'] and sp.check_file_exists(file_info) and not config.RESCRAPE and not force:
|
317
|
-
page = sp.get_page(file_info)
|
318
|
-
else:
|
319
|
-
page = scrape_page(file_info['url'])
|
320
|
-
sp.save_page(page, file_info)
|
321
|
-
|
322
|
-
return page
|
323
|
-
|
324
|
-
|
325
|
-
def check_data_format(data_format):
|
326
|
-
"""
|
327
|
-
Checks if data_format specified (if it is at all) is either None, 'Csv', or 'pandas'.
|
328
|
-
It exits program with error message if input isn't good.
|
329
|
-
|
330
|
-
:param data_format: data_format provided
|
331
|
-
|
332
|
-
:return: Boolean - True if good
|
333
|
-
"""
|
334
|
-
if not data_format or data_format.lower() not in ['csv', 'pandas']:
|
335
|
-
raise ValueError('{} is an unspecified data format. The two options are Csv and Pandas '
|
336
|
-
'(Csv is default)\n'.format(data_format))
|
337
|
-
|
338
|
-
|
339
|
-
def check_valid_dates(from_date, to_date):
|
340
|
-
"""
|
341
|
-
Check if it's a valid date range
|
342
|
-
|
343
|
-
:param from_date: date should scrape from
|
344
|
-
:param to_date: date should scrape to
|
345
|
-
|
346
|
-
:return: None
|
347
|
-
"""
|
348
|
-
try:
|
349
|
-
if time.strptime(to_date, "%Y-%m-%d") < time.strptime(from_date, "%Y-%m-%d"):
|
350
|
-
raise ValueError("Error: The second date input is earlier than the first one")
|
351
|
-
except ValueError:
|
352
|
-
raise ValueError("Error: Incorrect format given for dates. They must be given like 'yyyy-mm-dd' "
|
353
|
-
"(ex: '2016-10-01').")
|
354
|
-
|
355
|
-
|
356
|
-
def to_csv(base_file_name, df, league, file_type):
|
357
|
-
"""
|
358
|
-
Write DataFrame to csv file
|
359
|
-
|
360
|
-
:param base_file_name: name of file
|
361
|
-
:param df: DataFrame
|
362
|
-
:param league: nhl or nwhl
|
363
|
-
:param file_type: type of file despoiting
|
364
|
-
|
365
|
-
:return: None
|
366
|
-
"""
|
367
|
-
docs_dir = config.DOCS_DIR
|
368
|
-
|
369
|
-
# This was a late addition so we add support here
|
370
|
-
if isinstance(docs_dir, str) and not os.path.isdir(os.path.join(docs_dir, "csvs")):
|
371
|
-
os.mkdir(os.path.join(docs_dir, "csvs"))
|
372
|
-
|
373
|
-
if df is not None:
|
374
|
-
if isinstance(docs_dir, str):
|
375
|
-
file_name = os.path.join(docs_dir, "csvs", '{}_{}_{}.csv'.format(league, file_type, base_file_name))
|
376
|
-
else:
|
377
|
-
file_name = '{}_{}_{}.csv'.format(league, file_type, base_file_name)
|
378
|
-
|
379
|
-
print("---> {} {} data deposited in file - {}".format(league, file_type, file_name))
|
380
|
-
df.to_csv(file_name, sep=',', encoding='utf-8')
|
381
|
-
|
382
|
-
import re
|
383
|
-
from bs4 import BeautifulSoup, SoupStrainer
|
384
|
-
|
385
|
-
## html_pbp.py ##
|
386
33
|
def get_contents(game_html):
|
387
|
-
|
388
|
-
Uses Beautiful soup to parses the html document.
|
389
|
-
Some parsers work for some pages but don't work for others....I'm not sure why so I just try them all here in order
|
390
|
-
|
391
|
-
:param game_html: html doc
|
392
|
-
|
393
|
-
:return: "soupified" html
|
394
|
-
"""
|
34
|
+
#Parse NHL HTML PBP document
|
395
35
|
parsers = ["html5lib", "lxml", "html.parser"]
|
396
36
|
strainer = SoupStrainer('td', attrs={'class': re.compile(r'bborder')})
|
397
37
|
|
@@ -409,16 +49,8 @@ def get_contents(game_html):
|
|
409
49
|
|
410
50
|
return tds
|
411
51
|
|
412
|
-
## html_shifts.py ##
|
413
52
|
def get_soup(shifts_html):
|
414
|
-
|
415
|
-
Uses Beautiful soup to parses the html document.
|
416
|
-
Some parsers work for some pages but don't work for others....I'm not sure why so I just try them all here in order
|
417
|
-
|
418
|
-
:param shifts_html: html doc
|
419
|
-
|
420
|
-
:return: "soupified" html and player_shifts portion of html (it's a bunch of td tags)
|
421
|
-
"""
|
53
|
+
#Convert html document to soup
|
422
54
|
parsers = ["lxml", "html.parser", "html5lib"]
|
423
55
|
|
424
56
|
for parser in parsers:
|
@@ -430,19 +62,12 @@ def get_soup(shifts_html):
|
|
430
62
|
|
431
63
|
return td, get_teams(soup)
|
432
64
|
|
433
|
-
|
434
65
|
def get_teams(soup):
|
435
|
-
|
436
|
-
Return the team for the TOI tables and the home team
|
437
|
-
|
438
|
-
:param soup: souped up html
|
439
|
-
|
440
|
-
:return: list with team and home team
|
441
|
-
"""
|
66
|
+
#Find and return list of teams a given document's match (for HTML shifts parsing)
|
442
67
|
team = soup.find('td', class_='teamHeading + border') # Team for shifts
|
443
68
|
team = team.get_text()
|
444
69
|
|
445
|
-
#
|
70
|
+
#Find home team
|
446
71
|
teams = soup.find_all('td', {'align': 'center', 'style': 'font-size: 10px;font-weight:bold'})
|
447
72
|
regex = re.compile(r'>(.*)<br/?>')
|
448
73
|
home_team = regex.findall(str(teams[7]))
|
wsba_hockey/tools/xg_model.py
CHANGED
@@ -108,7 +108,12 @@ def fix_players(pbp):
|
|
108
108
|
pbp[f'add_player_{i+1}_name'] = np.where(pbp[f'event_player_{i+1}_name'].isna(),pbp[f'event_player_{i+1}_id'].astype(str).replace(names_dict),np.nan)
|
109
109
|
pbp[f'event_player_{i+1}_name'] = pbp[f'event_player_{i+1}_name'].combine_first(pbp[f'add_player_{i+1}_name'])
|
110
110
|
|
111
|
-
pbp
|
111
|
+
#For the first three pbp seasons the event_goalie_id isn't included as a column
|
112
|
+
try:
|
113
|
+
pbp['event_goalie_name'] = pbp['event_goalie_id'].astype(str).replace(names_dict)
|
114
|
+
except KeyError:
|
115
|
+
pbp['event_goalie_id'] = np.where(pbp['event_team_venue']=='home',pbp['home_goalie_id'],pbp['away_goalie_id']).astype(str)
|
116
|
+
pbp['event_goalie_name'] = pbp['event_goalie_id'].astype(str).replace(names_dict)
|
112
117
|
|
113
118
|
#Add hands
|
114
119
|
pbp['event_player_1_hand'] = pbp['event_player_1_id'].astype(str).str.replace('.0','').replace(roster_dict)
|
wsba_hockey/wsba_main.py
CHANGED
@@ -34,7 +34,8 @@ SEASONS = [
|
|
34
34
|
20212022,
|
35
35
|
20222023,
|
36
36
|
20232024,
|
37
|
-
20242025
|
37
|
+
20242025,
|
38
|
+
20252026
|
38
39
|
]
|
39
40
|
|
40
41
|
CONVERT_SEASONS = {2007: 20072008,
|
@@ -54,7 +55,28 @@ CONVERT_SEASONS = {2007: 20072008,
|
|
54
55
|
2021: 20212022,
|
55
56
|
2022: 20222023,
|
56
57
|
2023: 20232024,
|
57
|
-
2024: 20242025
|
58
|
+
2024: 20242025,
|
59
|
+
2025: 20252026}
|
60
|
+
|
61
|
+
SEASON_NAMES = {20072008: '2007-08',
|
62
|
+
20082009: '2008-09',
|
63
|
+
20092010: '2009-10',
|
64
|
+
20102011: '2010-11',
|
65
|
+
20112012: '2011-12',
|
66
|
+
20122013: '2012-13',
|
67
|
+
20132014: '2013-14',
|
68
|
+
20142015: '2014-15',
|
69
|
+
20152016: '2015-16',
|
70
|
+
20162017: '2016-17',
|
71
|
+
20172018: '2017-18',
|
72
|
+
20182019: '2018-19',
|
73
|
+
20192020: '2019-20',
|
74
|
+
20202021: '2020-21',
|
75
|
+
20212022: '2021-22',
|
76
|
+
20222023: '2022-23',
|
77
|
+
20232024: '2023-24',
|
78
|
+
20242025: '2024-25',
|
79
|
+
20252025: '2025-26'}
|
58
80
|
|
59
81
|
CONVERT_TEAM_ABBR = {'L.A':'LAK',
|
60
82
|
'N.J':'NJD',
|
@@ -72,14 +94,13 @@ KNOWN_PROBS = {
|
|
72
94
|
2008020259:'HTML data is completely missing for this game.',
|
73
95
|
2008020409:'HTML data is completely missing for this game.',
|
74
96
|
2008021077:'HTML data is completely missing for this game.',
|
97
|
+
2008030311:'Missing shifts data for game between Pittsburgh and Carolina',
|
75
98
|
2009020081:'HTML pbp for this game between Pittsburgh and Carolina is missing all but the period start and first faceoff events, for some reason.',
|
76
99
|
2009020658:'Missing shifts data for game between New York Islanders and Dallas.',
|
77
100
|
2009020885:'Missing shifts data for game between Sharks and Blue Jackets.',
|
78
101
|
2010020124:'Game between Capitals and Hurricanes is sporadically missing player on-ice data',
|
79
102
|
2012020018:'HTML events contain mislabeled events.',
|
80
|
-
2013020971:'On March 10th, 2014, Stars forward Rich Peverley suffered from a cardiac episode midgame and as a result, the remainder of the game was postponed. \nThe game resumed on April 9th, and the only goal scorer in the game, Blue Jackets forward Nathan Horton, did not appear in the resumed game due to injury. Interestingly, Horton would never play in the NHL again.',
|
81
103
|
2018021133:'Game between Lightning and Capitals has incorrectly labeled event teams (i.e. WSH TAKEAWAY - #71 CIRELLI (Cirelli is a Tampa Bay skater in this game)).',
|
82
|
-
2019020876:'Due to the frightening collapse of Blues defensemen Jay Bouwmeester, a game on February 2nd, 2020 between the Ducks and Blues was postponed. \nWhen the game resumed, Ducks defensemen Hampus Lindholm, who assisted on a goal in the inital game, did not play in the resumed match.'
|
83
104
|
}
|
84
105
|
|
85
106
|
SHOT_TYPES = ['wrist','deflected','tip-in','slap','backhand','snap','wrap-around','poke','bat','cradle','between-legs']
|
@@ -99,7 +120,7 @@ def nhl_scrape_game(game_ids:list[int], split_shifts:bool = False, remove:list[s
|
|
99
120
|
Given a set of game_ids (NHL API), return complete play-by-play information as requested.
|
100
121
|
|
101
122
|
Args:
|
102
|
-
game_ids (List[int] or ['random', int, int, int]):
|
123
|
+
game_ids (int or List[int] or ['random', int, int, int]):
|
103
124
|
List of NHL game IDs to scrape or use ['random', n, start_year, end_year] to fetch n random games.
|
104
125
|
split_shifts (bool, optional):
|
105
126
|
If True, returns a dict with separate 'pbp' and 'shifts' DataFrames. Default is False.
|
@@ -122,10 +143,13 @@ def nhl_scrape_game(game_ids:list[int], split_shifts:bool = False, remove:list[s
|
|
122
143
|
- 'errors' (optional): list of game IDs that failed if errors=True
|
123
144
|
"""
|
124
145
|
|
146
|
+
#Wrap game_id in a list if only a single game_id is provided
|
147
|
+
game_ids = [game_ids] if type(game_ids) != list else game_ids
|
148
|
+
|
125
149
|
pbps = []
|
126
150
|
if game_ids[0] == 'random':
|
127
151
|
#Randomize selection of game_ids
|
128
|
-
#Some ids returned may be invalid (for example,
|
152
|
+
#Some ids returned may be invalid (for example, 2020022000)
|
129
153
|
num = game_ids[1]
|
130
154
|
start = game_ids[2] if len(game_ids) > 1 else 2007
|
131
155
|
end = game_ids[3] if len(game_ids) > 2 else (date.today().year)-1
|
@@ -190,7 +214,7 @@ def nhl_scrape_game(game_ids:list[int], split_shifts:bool = False, remove:list[s
|
|
190
214
|
|
191
215
|
#Track error
|
192
216
|
error_ids.append(game_id)
|
193
|
-
|
217
|
+
|
194
218
|
#Add all pbps together
|
195
219
|
if not pbps:
|
196
220
|
print("\rNo data returned.")
|
@@ -1139,7 +1163,7 @@ def nhl_calculate_stats(pbp:pd.DataFrame, type:Literal['skater','goalie','team']
|
|
1139
1163
|
'birthCountry':'Nationality'})
|
1140
1164
|
|
1141
1165
|
#WSBA
|
1142
|
-
complete['WSBA'] = complete['
|
1166
|
+
complete['WSBA'] = complete['ID'].astype(str).str.replace('.0','')+complete['Team']+complete['Season'].astype(str)
|
1143
1167
|
|
1144
1168
|
#Add player age
|
1145
1169
|
complete['Birthday'] = pd.to_datetime(complete['Birthday'])
|
@@ -1149,6 +1173,9 @@ def nhl_calculate_stats(pbp:pd.DataFrame, type:Literal['skater','goalie','team']
|
|
1149
1173
|
#Find player headshot
|
1150
1174
|
complete['Headshot'] = 'https://assets.nhle.com/mugs/nhl/'+complete['Season'].astype(str)+'/'+complete['Team']+'/'+complete['ID'].astype(int).astype(str)+'.png'
|
1151
1175
|
|
1176
|
+
#Convert season name
|
1177
|
+
complete['Season'] = complete['Season'].replace(SEASON_NAMES)
|
1178
|
+
|
1152
1179
|
head = ['Goalie','ID','Game'] if 'Game' in complete.columns else ['Goalie','ID']
|
1153
1180
|
complete = complete[head+[
|
1154
1181
|
"Season","Team",'WSBA',
|
@@ -1191,6 +1218,9 @@ def nhl_calculate_stats(pbp:pd.DataFrame, type:Literal['skater','goalie','team']
|
|
1191
1218
|
complete['xGF%'] = complete['xGF']/(complete['xGF']+complete['xGA'])
|
1192
1219
|
complete['FF%'] = complete['FF']/(complete['FF']+complete['FA'])
|
1193
1220
|
complete['CF%'] = complete['CF']/(complete['CF']+complete['CA'])
|
1221
|
+
|
1222
|
+
#Convert season name
|
1223
|
+
complete['Season'] = complete['Season'].replace(SEASON_NAMES)
|
1194
1224
|
|
1195
1225
|
head = ['Team','Game'] if 'Game' in complete.columns else ['Team']
|
1196
1226
|
complete = complete[head+[
|
@@ -1274,7 +1304,7 @@ def nhl_calculate_stats(pbp:pd.DataFrame, type:Literal['skater','goalie','team']
|
|
1274
1304
|
#Remove goalies that occasionally appear in a set
|
1275
1305
|
complete = complete.loc[complete['Position']!='G']
|
1276
1306
|
#Add WSBA ID
|
1277
|
-
complete['WSBA'] = complete['
|
1307
|
+
complete['WSBA'] = complete['ID'].astype(str).str.replace('.0','')+complete['Season'].astype(str)+complete['Team']
|
1278
1308
|
|
1279
1309
|
#Add per 60 stats
|
1280
1310
|
for stat in PER_SIXTY:
|
@@ -1286,6 +1316,9 @@ def nhl_calculate_stats(pbp:pd.DataFrame, type:Literal['skater','goalie','team']
|
|
1286
1316
|
for stat in PER_SIXTY[:3]:
|
1287
1317
|
type_metrics.append(f'{type.capitalize()}{stat}')
|
1288
1318
|
|
1319
|
+
#Convert season name
|
1320
|
+
complete['Season'] = complete['Season'].replace(SEASON_NAMES)
|
1321
|
+
|
1289
1322
|
head = ['Player','ID','Game'] if 'Game' in complete.columns else ['Player','ID']
|
1290
1323
|
complete = complete[head+[
|
1291
1324
|
"Season","Team",'WSBA',
|
@@ -1529,13 +1562,13 @@ class NHL_Database:
|
|
1529
1562
|
The initialized play-by-play dataset.
|
1530
1563
|
"""
|
1531
1564
|
|
1532
|
-
print('Initializing database...')
|
1565
|
+
print(f'Initializing database "{name}"...')
|
1533
1566
|
self.name = name
|
1534
1567
|
|
1535
1568
|
if game_ids:
|
1536
|
-
self.pbp = nhl_scrape_game(game_ids)
|
1569
|
+
self.pbp = nhl_apply_xG(nhl_scrape_game(game_ids))
|
1537
1570
|
else:
|
1538
|
-
self.pbp = nhl_scrape_game(['random',3,
|
1571
|
+
self.pbp = nhl_apply_xG(nhl_scrape_game(['random',3,2007,2024])) if pbp.empty else pbp
|
1539
1572
|
|
1540
1573
|
self.games = self.pbp['game_id'].drop_duplicates().to_list()
|
1541
1574
|
self.stats = {}
|
@@ -1555,7 +1588,7 @@ class NHL_Database:
|
|
1555
1588
|
"""
|
1556
1589
|
|
1557
1590
|
print('Adding games...')
|
1558
|
-
self.pbp = pd.concat([self.pbp,wsba.nhl_scrape_game(game_ids)])
|
1591
|
+
self.pbp = pd.concat([self.pbp,nhl_apply_xG(wsba.nhl_scrape_game(game_ids))])
|
1559
1592
|
|
1560
1593
|
return self.pbp
|
1561
1594
|
|
@@ -1650,7 +1683,7 @@ class NHL_Database:
|
|
1650
1683
|
Root folder to export data into. Defaults to `self.name/`.
|
1651
1684
|
"""
|
1652
1685
|
|
1653
|
-
print('Exporting data...')
|
1686
|
+
print(f'Exporting data in database "{self.name}"...')
|
1654
1687
|
start = time.perf_counter()
|
1655
1688
|
|
1656
1689
|
# Use default path if none provided
|