warn-scraper 1.2.73__tar.gz → 1.2.75__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {warn-scraper-1.2.73/warn_scraper.egg-info → warn-scraper-1.2.75}/PKG-INFO +1 -1
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/ca.py +19 -14
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/id.py +6 -8
- {warn-scraper-1.2.73 → warn-scraper-1.2.75/warn_scraper.egg-info}/PKG-INFO +1 -1
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/.devcontainer/devcontainer.json +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/.github/dependabot.yml.disabled +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/.github/workflows/continuous-deployment.yml +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/.gitignore +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/.pre-commit-config.yaml +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/LICENSE +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/MANIFEST.in +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/Makefile +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/Pipfile +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/Pipfile.lock +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/README.md +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/Makefile +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/_static/R42693.pdf +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/_static/gao-03-1003.pdf +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/_static/releasing-actions-finished.png +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/_static/releasing-actions-start.png +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/_static/releasing-changelog-button.png +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/_static/releasing-changelog-entered.png +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/_static/releasing-draft-button.png +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/_static/releasing-name-release.png +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/_static/releasing-name-tag.png +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/_static/releasing-publish-button.png +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/_static/releasing-pypi.png +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/_static/releasing-release-published.png +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/_static/releasing-releases-button.png +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/_static/releasing-tag-button.png +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/_templates/sources.md.tmpl +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/conf.py +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/contributing.rst +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/index.rst +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/make.bat +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/reference.rst +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/releasing.md +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/requirements.txt +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/scrapers/al.md +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/scrapers/az.md +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/scrapers/ca.md +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/scrapers/co.md +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/scrapers/dc.md +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/scrapers/de.md +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/scrapers/ia.md +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/scrapers/in.md +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/scrapers/job_center.md +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/scrapers/ks.md +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/scrapers/md.md +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/scrapers/me.md +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/scrapers/mo.md +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/scrapers/ny.md +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/scrapers/ok.md +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/scrapers/or.md +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/scrapers/sc.md +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/scrapers/tx.md +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/scrapers/ut.md +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/scrapers/va.md +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/scrapers/vt.md +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/scrapers/wi.md +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/sources.md +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/usage.md +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/setup.cfg +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/setup.py +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/tests/__init__.py +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/tests/cassettes/test_cached_detail_pages.yaml +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/tests/cassettes/test_cached_search_results.yaml +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/tests/cassettes/test_missing_detail_page_values.yaml +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/tests/cassettes/test_no_results.yaml +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/tests/cassettes/test_paged_results.yaml +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/tests/cassettes/test_scrape_integration.yaml +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/tests/conftest.py +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/tests/fixtures/2021_page_1.html +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/tests/fixtures/2021_page_2.html +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/tests/test_cache.py +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/tests/test_delete.py +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/tests/test_job_center.py +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/tests/test_job_center_cache.py +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/tests/test_openpyxl.py +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/__init__.py +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/cache.py +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/cli.py +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/platforms/__init__.py +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/platforms/job_center/__init__.py +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/platforms/job_center/cache.py +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/platforms/job_center/site.py +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/platforms/job_center/urls.py +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/platforms/job_center/utils.py +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/runner.py +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/__init__.py +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/ak.py +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/al.py +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/az.py +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/co.py +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/ct.py +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/dc.py +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/de.py +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/fl.py +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/ga.py +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/hi.py +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/ia.py +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/il.py +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/in.py +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/ks.py +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/ky.py +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/la.py +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/md.py +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/me.py +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/mi.py +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/mo.py +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/mt.py +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/ne.py +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/nj.py +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/nm.py +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/ny.py +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/oh.py +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/ok.py +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/or.py +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/ri.py +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/sc.py +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/sd.py +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/tn.py +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/tx.py +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/ut.py +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/va.py +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/vt.py +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/wa.py +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/wi.py +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/utils.py +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn_scraper.egg-info/SOURCES.txt +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn_scraper.egg-info/dependency_links.txt +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn_scraper.egg-info/entry_points.txt +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn_scraper.egg-info/not-zip-safe +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn_scraper.egg-info/requires.txt +0 -0
- {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn_scraper.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: warn-scraper
|
3
|
-
Version: 1.2.
|
3
|
+
Version: 1.2.75
|
4
4
|
Summary: Command-line interface for downloading WARN Act notices of qualified plant closings and mass layoffs from state government websites
|
5
5
|
Home-page: https://github.com/biglocalnews/warn-scraper
|
6
6
|
Author: Big Local News
|
@@ -224,20 +224,25 @@ def _extract_pdf_data(pdf_path):
|
|
224
224
|
if "summary" in first_cell:
|
225
225
|
continue
|
226
226
|
for row in rows:
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
227
|
+
# Summary rows have an extra field, and the above code does not
|
228
|
+
# block the summary table from being parsed if it jumps onto another page.
|
229
|
+
if len(row) != len(raw_header) + 1:
|
230
|
+
data_row = {}
|
231
|
+
for i, value in enumerate(row):
|
232
|
+
this_raw_header = raw_header[i]
|
233
|
+
this_clean_header = header_crosswalk[this_raw_header]
|
234
|
+
data_row[this_clean_header] = value
|
235
|
+
# Data clean-ups
|
236
|
+
data_row.update(
|
237
|
+
{
|
238
|
+
"effective_date": data_row["effective_date"].replace(
|
239
|
+
" ", ""
|
240
|
+
),
|
241
|
+
"received_date": data_row["received_date"].replace(" ", ""),
|
242
|
+
"source_file": str(pdf_path).split("/")[-1],
|
243
|
+
}
|
244
|
+
)
|
245
|
+
data.append(data_row)
|
241
246
|
return data
|
242
247
|
|
243
248
|
|
@@ -12,7 +12,7 @@ __authors__ = ["chriszs", "stucka"]
|
|
12
12
|
__tags__ = ["pdf"]
|
13
13
|
__source__ = {
|
14
14
|
"name": "Idaho Department of Labor",
|
15
|
-
"url": "https://www.labor.idaho.gov/
|
15
|
+
"url": "https://www.labor.idaho.gov/warnnotice/",
|
16
16
|
}
|
17
17
|
|
18
18
|
logger = logging.getLogger(__name__)
|
@@ -32,7 +32,7 @@ def scrape(
|
|
32
32
|
Returns: the Path where the file is written
|
33
33
|
"""
|
34
34
|
# Create the URL of the source PDF
|
35
|
-
base_url = "https://www.labor.idaho.gov/
|
35
|
+
base_url = "https://www.labor.idaho.gov/warnnotice/"
|
36
36
|
file_name = "WARNNotice.pdf"
|
37
37
|
# There's a numeric parameter called v on this PDF URL that updates
|
38
38
|
# from time to time. Suspect this is a cache-buster. We're using a
|
@@ -40,10 +40,8 @@ def scrape(
|
|
40
40
|
min_cache_buster = 0
|
41
41
|
max_cache_buster = 10000000000
|
42
42
|
cache_buster = random.randrange(min_cache_buster, max_cache_buster)
|
43
|
-
url = f"{base_url}
|
43
|
+
url = f"{base_url}?v={cache_buster}"
|
44
44
|
|
45
|
-
# Download the PDF with verify=False because
|
46
|
-
# there's a persistent cert error we're working around.
|
47
45
|
cache = Cache(cache_dir)
|
48
46
|
state_code = "id"
|
49
47
|
cache_key = f"{state_code}/{file_name}"
|
@@ -126,9 +124,9 @@ def filter_garbage_rows(incoming: list):
|
|
126
124
|
badrows += 1
|
127
125
|
if badrows == 0:
|
128
126
|
logger.debug("No bad rows found.")
|
129
|
-
|
130
|
-
|
131
|
-
|
127
|
+
logger.debug(
|
128
|
+
f"!!!!! {badrows:,} bad rows dropped from the data set with insufficient number of fields."
|
129
|
+
)
|
132
130
|
return outgoing
|
133
131
|
|
134
132
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: warn-scraper
|
3
|
-
Version: 1.2.
|
3
|
+
Version: 1.2.75
|
4
4
|
Summary: Command-line interface for downloading WARN Act notices of qualified plant closings and mass layoffs from state government websites
|
5
5
|
Home-page: https://github.com/biglocalnews/warn-scraper
|
6
6
|
Author: Big Local News
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{warn-scraper-1.2.73 → warn-scraper-1.2.75}/tests/cassettes/test_missing_detail_page_values.yaml
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|