warn-scraper 1.2.86__py3-none-any.whl → 1.2.88__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- warn/scrapers/co.py +4 -0
- warn/scrapers/va.py +26 -14
- {warn_scraper-1.2.86.dist-info → warn_scraper-1.2.88.dist-info}/METADATA +1 -1
- {warn_scraper-1.2.86.dist-info → warn_scraper-1.2.88.dist-info}/RECORD +8 -8
- {warn_scraper-1.2.86.dist-info → warn_scraper-1.2.88.dist-info}/LICENSE +0 -0
- {warn_scraper-1.2.86.dist-info → warn_scraper-1.2.88.dist-info}/WHEEL +0 -0
- {warn_scraper-1.2.86.dist-info → warn_scraper-1.2.88.dist-info}/entry_points.txt +0 -0
- {warn_scraper-1.2.86.dist-info → warn_scraper-1.2.88.dist-info}/top_level.txt +0 -0
warn/scrapers/co.py
CHANGED
@@ -123,7 +123,11 @@ def scrape(
|
|
123
123
|
"Occupations Impacted": "occupations",
|
124
124
|
"Occupations": "occupations",
|
125
125
|
"Select the workforce area": "workforce_area",
|
126
|
+
"Total CO": "jobs",
|
127
|
+
"CO Layoffs": "jobs",
|
126
128
|
"Total number of permanent layoffs": "permanent_job_losses",
|
129
|
+
"# permanent": "permanent_job_losses",
|
130
|
+
"# Permanent": "permanent_job_losses",
|
127
131
|
"Total number of temporary layoffs": "temporary_job_losses",
|
128
132
|
"Total number of furloughs": "furloughs",
|
129
133
|
"Begin date of layoffs": "begin_date",
|
warn/scrapers/va.py
CHANGED
@@ -1,11 +1,12 @@
|
|
1
1
|
import logging
|
2
2
|
from pathlib import Path
|
3
3
|
|
4
|
-
from bs4 import BeautifulSoup, Tag
|
5
|
-
|
6
4
|
from .. import utils
|
7
5
|
from ..cache import Cache
|
8
6
|
|
7
|
+
# from bs4 import BeautifulSoup, Tag
|
8
|
+
|
9
|
+
|
9
10
|
__authors__ = ["zstumgoren", "Dilcia19", "shallotly"]
|
10
11
|
__tags__ = ["html", "csv"]
|
11
12
|
__source__ = {
|
@@ -29,26 +30,37 @@ def scrape(
|
|
29
30
|
|
30
31
|
Returns: the Path where the file is written
|
31
32
|
"""
|
33
|
+
# This scraper initially tried to get a CSV download link that was only for the most recent entries. The scraping part of that broke.
|
34
|
+
# It's now hard-coded to a particular download link with parameters that should get the full thing.
|
35
|
+
|
36
|
+
# This may break again, but this revised attempt has far fewer moving parts and actually fetches the complete data set.
|
37
|
+
# Blame Stucka in December 2024.
|
38
|
+
|
32
39
|
# Get the WARN page
|
33
|
-
url = "https://www.vec.virginia.gov/warn-notices"
|
34
|
-
|
35
|
-
|
40
|
+
# url = "https://www.vec.virginia.gov/warn-notices"
|
41
|
+
# url = "https://vec.virginia.gov/warn-notices?field_notice_date_value%5Bmin%5D%5Bdate%5D=1%2F1%2F1990&field_notice_date_value%5Bmax%5D%5Bdate%5D=&field_region_warn_tid=All"
|
42
|
+
# r = utils.get_url(url, verify=True)
|
43
|
+
# html = r.text
|
36
44
|
|
37
45
|
# Save it to the cache
|
38
46
|
cache = Cache(cache_dir)
|
39
|
-
cache.write("va/source.html", html)
|
47
|
+
# cache.write("va/source.html", html)
|
40
48
|
|
41
49
|
# Parse out the CSV download link
|
42
|
-
soup = BeautifulSoup(html, "html.parser")
|
43
|
-
csv_link = soup.find("a", text="Download")
|
44
|
-
if isinstance(csv_link, Tag):
|
45
|
-
|
46
|
-
else:
|
47
|
-
|
48
|
-
|
50
|
+
# soup = BeautifulSoup(html, "html.parser")
|
51
|
+
# csv_link = soup.find("a", text="Download")
|
52
|
+
# if isinstance(csv_link, Tag):
|
53
|
+
# csv_href = csv_link["href"]
|
54
|
+
# else:
|
55
|
+
# raise ValueError("Could not find CSV link")
|
56
|
+
|
57
|
+
# csv_href = "/warn-notices-csv.csv?"
|
58
|
+
# csv_url = f"https://www.vec.virginia.gov{csv_href}"
|
59
|
+
|
60
|
+
csv_url = "https://vec.virginia.gov/warn-notices-csv.csv?field_notice_date_value%5Bmin%5D%5Bdate%5D=1%2F1%2F1990&field_notice_date_value%5Bmax%5D%5Bdate%5D=&field_region_warn_tid=All"
|
49
61
|
|
50
62
|
# Download it to the cache
|
51
|
-
cache.download("va/source.csv", csv_url, verify=
|
63
|
+
cache.download("va/source.csv", csv_url, verify=True)
|
52
64
|
|
53
65
|
# Open it up as a list of rows
|
54
66
|
csv_rows = cache.read_csv("va/source.csv")
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: warn-scraper
|
3
|
-
Version: 1.2.
|
3
|
+
Version: 1.2.88
|
4
4
|
Summary: Command-line interface for downloading WARN Act notices of qualified plant closings and mass layoffs from state government websites
|
5
5
|
Home-page: https://github.com/biglocalnews/warn-scraper
|
6
6
|
Author: Big Local News
|
@@ -29,7 +29,7 @@ warn/scrapers/ak.py,sha256=h7BYMTV0whwWAPhbzVDVKMMoVCFphKly70aiTHabPq4,1847
|
|
29
29
|
warn/scrapers/al.py,sha256=D0rT9GQ0vwfkRuveVAt-Po-T6b2TI1EPGeLOBy2m3_M,2240
|
30
30
|
warn/scrapers/az.py,sha256=elGbue01Gjf_DQ66Wy9qqGIOJsiY-KIKJOVeft8pCXg,1447
|
31
31
|
warn/scrapers/ca.py,sha256=VQOfjHXPCc-jYwh-EPGVVfnzvXB7pdmCt2uJ6QnMPRM,8600
|
32
|
-
warn/scrapers/co.py,sha256=
|
32
|
+
warn/scrapers/co.py,sha256=fgeZj8UVtK0mHA07pH1z_wNA3QRSPltwK-Htoby8uJo,7540
|
33
33
|
warn/scrapers/ct.py,sha256=HLMmBSFhT5Y3vZQUwRyCTxiG5BMQXTfG3SEj5rkQEL4,4771
|
34
34
|
warn/scrapers/dc.py,sha256=_sHLnVqK_W90QqJb_W88yDlgPjoMl63LYZP3CJfdN9g,4484
|
35
35
|
warn/scrapers/de.py,sha256=GyM92A-lFwZAfRxgbO-sIWhRfmBEKirzchaPIv-u0o4,1364
|
@@ -61,13 +61,13 @@ warn/scrapers/sd.py,sha256=_4R19Ybzsyx1PvcWV3_laJmJ3etrwVGfhNEQm6njwoA,1904
|
|
61
61
|
warn/scrapers/tn.py,sha256=i1H7c09Ea3CDrTXqqRMLBMPT_34QtGA0-x7T8rm_j5Q,2945
|
62
62
|
warn/scrapers/tx.py,sha256=watfR1gyN9w7nluiAOnnIghEmoq3eShNUzYSZ8SkZy4,4438
|
63
63
|
warn/scrapers/ut.py,sha256=iUh38YIjbvv5MyyKacsiZNe8KjfdBeDaOf-qMQEF_kc,2245
|
64
|
-
warn/scrapers/va.py,sha256
|
64
|
+
warn/scrapers/va.py,sha256=-QRIMPVIhBGDiKQOaMwwZbPtJxd1S2QwYX4Zxq1NNt0,2549
|
65
65
|
warn/scrapers/vt.py,sha256=d-bo4WK2hkrk4BhCCmLpEovcoZltlvdIUB6O0uaMx5A,1186
|
66
66
|
warn/scrapers/wa.py,sha256=UXdVtHZo_a-XfoiyOooTRfTb9W3PErSZdKca6SRORgs,4282
|
67
67
|
warn/scrapers/wi.py,sha256=ClEzXkwZbop0W4fkQgsb5oHAPUrb4luUPGV-jOKwkcg,4855
|
68
|
-
warn_scraper-1.2.
|
69
|
-
warn_scraper-1.2.
|
70
|
-
warn_scraper-1.2.
|
71
|
-
warn_scraper-1.2.
|
72
|
-
warn_scraper-1.2.
|
73
|
-
warn_scraper-1.2.
|
68
|
+
warn_scraper-1.2.88.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
69
|
+
warn_scraper-1.2.88.dist-info/METADATA,sha256=-30KqUCkeCjeAa13tyECWG9PCHBxQhSPdbbhQ1rZGF4,2036
|
70
|
+
warn_scraper-1.2.88.dist-info/WHEEL,sha256=Wyh-_nZ0DJYolHNn1_hMa4lM7uDedD_RGVwbmTjyItk,91
|
71
|
+
warn_scraper-1.2.88.dist-info/entry_points.txt,sha256=poh_oSweObGlBSs1_2qZmnTodlOYD0KfO7-h7W2UQIw,47
|
72
|
+
warn_scraper-1.2.88.dist-info/top_level.txt,sha256=gOhHgNEkrUvajlzoKkVOo-TlQht9MoXnKOErjzqLGHo,11
|
73
|
+
warn_scraper-1.2.88.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|