warn-scraper 1.2.97__py3-none-any.whl → 1.2.99__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- warn/scrapers/va.py +24 -9
- {warn_scraper-1.2.97.dist-info → warn_scraper-1.2.99.dist-info}/METADATA +2 -1
- {warn_scraper-1.2.97.dist-info → warn_scraper-1.2.99.dist-info}/RECORD +7 -7
- {warn_scraper-1.2.97.dist-info → warn_scraper-1.2.99.dist-info}/LICENSE +0 -0
- {warn_scraper-1.2.97.dist-info → warn_scraper-1.2.99.dist-info}/WHEEL +0 -0
- {warn_scraper-1.2.97.dist-info → warn_scraper-1.2.99.dist-info}/entry_points.txt +0 -0
- {warn_scraper-1.2.97.dist-info → warn_scraper-1.2.99.dist-info}/top_level.txt +0 -0
warn/scrapers/va.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
import datetime
|
2
2
|
import logging
|
3
3
|
import os
|
4
|
+
import platform
|
4
5
|
from glob import glob
|
5
6
|
from pathlib import Path
|
6
7
|
from shutil import copyfile
|
@@ -23,6 +24,16 @@ __source__ = {
|
|
23
24
|
|
24
25
|
logger = logging.getLogger(__name__)
|
25
26
|
|
27
|
+
if platform.system() == "Windows":
|
28
|
+
message = "This scraper requires Xvfb, which does not appear to be "
|
29
|
+
message += "supported within Windows, even with WSL. This scraper "
|
30
|
+
message += "will not work for you."
|
31
|
+
logger.error(message)
|
32
|
+
quit()
|
33
|
+
else:
|
34
|
+
print(f"{platform.system} found")
|
35
|
+
from xvfbwrapper import Xvfb
|
36
|
+
|
26
37
|
|
27
38
|
def scrape(
|
28
39
|
data_dir: Path = utils.WARN_DATA_DIR,
|
@@ -38,7 +49,8 @@ def scrape(
|
|
38
49
|
Returns: the Path where the file is written
|
39
50
|
"""
|
40
51
|
cache = Cache(cache_dir)
|
41
|
-
csv_url = "https://vec.virginia.gov/warn-notices-csv.csv"
|
52
|
+
# csv_url = "https://vec.virginia.gov/warn-notices-csv.csv"
|
53
|
+
csv_url = "https://vec.virginia.gov/warn_notices.csv"
|
42
54
|
|
43
55
|
"""
|
44
56
|
This scraper originally tried to parse HTML to find a CSV download link.
|
@@ -127,12 +139,17 @@ def scrape(
|
|
127
139
|
"THIRD_PARTY_NOTICES.chromedriver", "chromedriver.exe"
|
128
140
|
)
|
129
141
|
logger.debug(f"Chrome install variable is {chrome_install}")
|
130
|
-
service = ChromeService(chrome_install, service_args=["--verbose"])
|
131
|
-
driver = webdriver.Chrome(options=chromeoptionsholder, service=service)
|
132
|
-
logger.debug(f"Attempting to fetch {csv_url}")
|
133
|
-
driver.get(csv_url)
|
134
142
|
|
135
|
-
|
143
|
+
# Launch X Windows emulator, then launch Chrome to run with it
|
144
|
+
with Xvfb() as xvfb: # noqa: F841
|
145
|
+
service = ChromeService(chrome_install, service_args=["--verbose"])
|
146
|
+
driver = webdriver.Chrome(options=chromeoptionsholder, service=service)
|
147
|
+
logger.debug(f"Attempting to fetch {csv_url}")
|
148
|
+
driver.get(csv_url)
|
149
|
+
sleep(45) # Give it plenty of time to evaluate Javascript
|
150
|
+
driver.get(csv_url)
|
151
|
+
sleep(10)
|
152
|
+
driver.quit()
|
136
153
|
|
137
154
|
download_dir = os.path.expanduser("~") + "/Downloads"
|
138
155
|
|
@@ -140,7 +157,7 @@ def scrape(
|
|
140
157
|
logger.error(f"The download directory is not {download_dir}.")
|
141
158
|
|
142
159
|
# get the list of files
|
143
|
-
list_of_files = glob(download_dir + "/
|
160
|
+
list_of_files = glob(download_dir + "/warn_notices*.csv")
|
144
161
|
if len(list_of_files) == 0:
|
145
162
|
logger.error(f"No matching files found in {download_dir}.")
|
146
163
|
|
@@ -159,8 +176,6 @@ def scrape(
|
|
159
176
|
|
160
177
|
copyfile(latest_file, target_filename)
|
161
178
|
|
162
|
-
driver.quit()
|
163
|
-
|
164
179
|
# Download it to the cache
|
165
180
|
# cache.download("va/source.csv", csv_url, verify=True)
|
166
181
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: warn-scraper
|
3
|
-
Version: 1.2.
|
3
|
+
Version: 1.2.99
|
4
4
|
Summary: Command-line interface for downloading WARN Act notices of qualified plant closings and mass layoffs from state government websites
|
5
5
|
Home-page: https://github.com/biglocalnews/warn-scraper
|
6
6
|
Author: Big Local News
|
@@ -33,6 +33,7 @@ Requires-Dist: retry
|
|
33
33
|
Requires-Dist: selenium
|
34
34
|
Requires-Dist: tenacity
|
35
35
|
Requires-Dist: xlrd
|
36
|
+
Requires-Dist: xvfbwrapper
|
36
37
|
Requires-Dist: webdriver-manager
|
37
38
|
|
38
39
|
## Links
|
@@ -61,13 +61,13 @@ warn/scrapers/sd.py,sha256=_4R19Ybzsyx1PvcWV3_laJmJ3etrwVGfhNEQm6njwoA,1904
|
|
61
61
|
warn/scrapers/tn.py,sha256=i1H7c09Ea3CDrTXqqRMLBMPT_34QtGA0-x7T8rm_j5Q,2945
|
62
62
|
warn/scrapers/tx.py,sha256=watfR1gyN9w7nluiAOnnIghEmoq3eShNUzYSZ8SkZy4,4438
|
63
63
|
warn/scrapers/ut.py,sha256=iUh38YIjbvv5MyyKacsiZNe8KjfdBeDaOf-qMQEF_kc,2245
|
64
|
-
warn/scrapers/va.py,sha256=
|
64
|
+
warn/scrapers/va.py,sha256=AXcj3VpNfprhwVqVyc7hYzLamWtsf1_yOvpXSitpZeM,8389
|
65
65
|
warn/scrapers/vt.py,sha256=d-bo4WK2hkrk4BhCCmLpEovcoZltlvdIUB6O0uaMx5A,1186
|
66
66
|
warn/scrapers/wa.py,sha256=UXdVtHZo_a-XfoiyOooTRfTb9W3PErSZdKca6SRORgs,4282
|
67
67
|
warn/scrapers/wi.py,sha256=ClEzXkwZbop0W4fkQgsb5oHAPUrb4luUPGV-jOKwkcg,4855
|
68
|
-
warn_scraper-1.2.
|
69
|
-
warn_scraper-1.2.
|
70
|
-
warn_scraper-1.2.
|
71
|
-
warn_scraper-1.2.
|
72
|
-
warn_scraper-1.2.
|
73
|
-
warn_scraper-1.2.
|
68
|
+
warn_scraper-1.2.99.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
69
|
+
warn_scraper-1.2.99.dist-info/METADATA,sha256=UQ6xOoUHiV0l3bZONrApFK92uoWhZnoVbSvTb4QXVlg,2145
|
70
|
+
warn_scraper-1.2.99.dist-info/WHEEL,sha256=Wyh-_nZ0DJYolHNn1_hMa4lM7uDedD_RGVwbmTjyItk,91
|
71
|
+
warn_scraper-1.2.99.dist-info/entry_points.txt,sha256=poh_oSweObGlBSs1_2qZmnTodlOYD0KfO7-h7W2UQIw,47
|
72
|
+
warn_scraper-1.2.99.dist-info/top_level.txt,sha256=gOhHgNEkrUvajlzoKkVOo-TlQht9MoXnKOErjzqLGHo,11
|
73
|
+
warn_scraper-1.2.99.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|