warn-scraper 1.2.102__tar.gz → 1.2.103__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {warn_scraper-1.2.102/warn_scraper.egg-info → warn_scraper-1.2.103}/PKG-INFO +1 -1
- warn_scraper-1.2.103/warn/scrapers/ny.py +65 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103/warn_scraper.egg-info}/PKG-INFO +1 -1
- warn_scraper-1.2.102/warn/scrapers/ny.py +0 -130
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/.devcontainer/devcontainer.json +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/.github/dependabot.yml.disabled-for-sanity +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/.github/workflows/continuous-deployment.yml +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/.github/workflows/continuous-deployment.yml.broken-tests +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/.gitignore +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/.pre-commit-config.yaml +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/LICENSE +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/MANIFEST.in +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/Makefile +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/Pipfile +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/Pipfile.lock +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/README.md +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/docs/Makefile +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/docs/_static/R42693.pdf +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/docs/_static/gao-03-1003.pdf +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/docs/_static/releasing-actions-finished.png +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/docs/_static/releasing-actions-start.png +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/docs/_static/releasing-changelog-button.png +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/docs/_static/releasing-changelog-entered.png +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/docs/_static/releasing-draft-button.png +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/docs/_static/releasing-name-release.png +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/docs/_static/releasing-name-tag.png +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/docs/_static/releasing-publish-button.png +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/docs/_static/releasing-pypi.png +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/docs/_static/releasing-release-published.png +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/docs/_static/releasing-releases-button.png +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/docs/_static/releasing-tag-button.png +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/docs/_templates/sources.md.tmpl +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/docs/conf.py +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/docs/contributing.rst +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/docs/index.rst +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/docs/make.bat +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/docs/reference.rst +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/docs/releasing.md +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/docs/requirements.txt +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/docs/scrapers/al.md +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/docs/scrapers/az.md +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/docs/scrapers/ca.md +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/docs/scrapers/co.md +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/docs/scrapers/dc.md +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/docs/scrapers/de.md +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/docs/scrapers/ia.md +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/docs/scrapers/in.md +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/docs/scrapers/job_center.md +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/docs/scrapers/ks.md +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/docs/scrapers/md.md +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/docs/scrapers/me.md +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/docs/scrapers/mo.md +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/docs/scrapers/ny.md +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/docs/scrapers/ok.md +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/docs/scrapers/or.md +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/docs/scrapers/sc.md +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/docs/scrapers/tx.md +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/docs/scrapers/ut.md +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/docs/scrapers/va.md +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/docs/scrapers/vt.md +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/docs/scrapers/wi.md +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/docs/sources.md +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/docs/usage.md +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/setup.cfg +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/setup.py +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/tests/__init__.py +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/tests/cassettes/test_cached_detail_pages.yaml +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/tests/cassettes/test_cached_search_results.yaml +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/tests/cassettes/test_missing_detail_page_values.yaml +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/tests/cassettes/test_no_results.yaml +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/tests/cassettes/test_paged_results.yaml +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/tests/cassettes/test_scrape_integration.yaml +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/tests/conftest.py +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/tests/fixtures/2021_page_1.html +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/tests/fixtures/2021_page_2.html +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/tests/test_cache.py +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/tests/test_delete.py +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/tests/test_job_center.py +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/tests/test_job_center_cache.py +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/tests/test_openpyxl.py +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/warn/__init__.py +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/warn/cache.py +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/warn/cli.py +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/warn/platforms/__init__.py +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/warn/platforms/job_center/__init__.py +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/warn/platforms/job_center/cache.py +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/warn/platforms/job_center/site.py +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/warn/platforms/job_center/urls.py +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/warn/platforms/job_center/utils.py +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/warn/runner.py +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/warn/scrapers/__init__.py +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/warn/scrapers/ak.py +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/warn/scrapers/al.py +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/warn/scrapers/az.py +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/warn/scrapers/ca.py +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/warn/scrapers/co.py +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/warn/scrapers/ct.py +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/warn/scrapers/dc.py +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/warn/scrapers/de.py +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/warn/scrapers/fl.py +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/warn/scrapers/ga.py +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/warn/scrapers/hi.py +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/warn/scrapers/ia.py +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/warn/scrapers/id.py +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/warn/scrapers/il.py +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/warn/scrapers/in.py +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/warn/scrapers/ks.py +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/warn/scrapers/ky.py +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/warn/scrapers/la.py +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/warn/scrapers/md.py +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/warn/scrapers/me.py +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/warn/scrapers/mi.py +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/warn/scrapers/mo.py +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/warn/scrapers/mt.py +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/warn/scrapers/ne.py +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/warn/scrapers/nj.py +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/warn/scrapers/nm.py +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/warn/scrapers/oh.py +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/warn/scrapers/ok.py +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/warn/scrapers/or.py +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/warn/scrapers/ri.py +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/warn/scrapers/sc.py +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/warn/scrapers/sd.py +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/warn/scrapers/tn.py +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/warn/scrapers/tx.py +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/warn/scrapers/ut.py +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/warn/scrapers/va.py +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/warn/scrapers/vt.py +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/warn/scrapers/wa.py +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/warn/scrapers/wi.py +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/warn/utils.py +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/warn_scraper.egg-info/SOURCES.txt +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/warn_scraper.egg-info/dependency_links.txt +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/warn_scraper.egg-info/entry_points.txt +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/warn_scraper.egg-info/not-zip-safe +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/warn_scraper.egg-info/requires.txt +0 -0
- {warn_scraper-1.2.102 → warn_scraper-1.2.103}/warn_scraper.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: warn-scraper
|
3
|
-
Version: 1.2.
|
3
|
+
Version: 1.2.103
|
4
4
|
Summary: Command-line interface for downloading WARN Act notices of qualified plant closings and mass layoffs from state government websites
|
5
5
|
Home-page: https://github.com/biglocalnews/warn-scraper
|
6
6
|
Author: Big Local News
|
@@ -0,0 +1,65 @@
|
|
1
|
+
import logging
|
2
|
+
from pathlib import Path
|
3
|
+
|
4
|
+
from .. import utils
|
5
|
+
from ..cache import Cache
|
6
|
+
|
7
|
+
# from bs4 import BeautifulSoup
|
8
|
+
# from openpyxl import load_workbook
|
9
|
+
|
10
|
+
|
11
|
+
__authors__ = ["zstumgoren", "Dilcia19", "ydoc5212", "palewire", "stucka"]
|
12
|
+
__tags__ = ["historical", "excel", "html"]
|
13
|
+
__source__ = {
|
14
|
+
"name": "New York Department of Labor",
|
15
|
+
"url": "https://dol.ny.gov/warn-notices",
|
16
|
+
}
|
17
|
+
|
18
|
+
logger = logging.getLogger(__name__)
|
19
|
+
|
20
|
+
|
21
|
+
def scrape(
|
22
|
+
data_dir: Path = utils.WARN_DATA_DIR,
|
23
|
+
cache_dir: Path = utils.WARN_CACHE_DIR,
|
24
|
+
) -> Path:
|
25
|
+
"""
|
26
|
+
Scrape data from New York.
|
27
|
+
|
28
|
+
Keyword arguments:
|
29
|
+
data_dir -- the Path were the result will be saved (default WARN_DATA_DIR)
|
30
|
+
cache_dir -- the Path where results can be cached (default WARN_CACHE_DIR)
|
31
|
+
|
32
|
+
Returns: the Path where the file is written
|
33
|
+
"""
|
34
|
+
cache = Cache(cache_dir)
|
35
|
+
|
36
|
+
"""
|
37
|
+
In 2025 New York shifte from a collection of Excel and HTML to something in Tableau. Tableau notes:
|
38
|
+
Find a new landing page for a data page, now done in Tableau: https://dol.ny.gov/warn-dashboard
|
39
|
+
Scroll down and there's a "View in Tableau Public" I don't remember clicking
|
40
|
+
Opens in new tab at https://public.tableau.com/app/profile/kylee.teague2482/viz/WorkerAdjustmentRetrainingNotificationWARN/WARN
|
41
|
+
Append .csv to the end of that URL:
|
42
|
+
https://public.tableau.com/app/profile/kylee.teague2482/viz/WorkerAdjustmentRetrainingNotificationWARN/WARN.csv
|
43
|
+
Try it in requests, no good. Try it in browser again. File downloads. Find it in the downloads section of the browser. Right-click, copy download link, try that in requests and ... it worked?
|
44
|
+
"""
|
45
|
+
|
46
|
+
url = "https://public.tableau.com/views/WorkerAdjustmentRetrainingNotificationWARN/WARN.csv?%3Adisplay_static_image=y&%3AbootstrapWhenNotified=true&%3Aembed=true&%3Alanguage=en-US&:embed=y&:showVizHome=n&:apiID=host0#navType=0&navSrc=Parse"
|
47
|
+
|
48
|
+
csv_file = "ny/tableau.csv"
|
49
|
+
|
50
|
+
cache.download(csv_file, url)
|
51
|
+
|
52
|
+
mydata = cache.read_csv(csv_file)
|
53
|
+
|
54
|
+
# Set the export path
|
55
|
+
data_path = data_dir / "ny.csv"
|
56
|
+
|
57
|
+
# Combine and write out the file
|
58
|
+
utils.write_rows_to_csv(data_path, mydata)
|
59
|
+
|
60
|
+
# Return the path to the file
|
61
|
+
return data_path
|
62
|
+
|
63
|
+
|
64
|
+
if __name__ == "__main__":
|
65
|
+
scrape()
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: warn-scraper
|
3
|
-
Version: 1.2.
|
3
|
+
Version: 1.2.103
|
4
4
|
Summary: Command-line interface for downloading WARN Act notices of qualified plant closings and mass layoffs from state government websites
|
5
5
|
Home-page: https://github.com/biglocalnews/warn-scraper
|
6
6
|
Author: Big Local News
|
@@ -1,130 +0,0 @@
|
|
1
|
-
import logging
|
2
|
-
from pathlib import Path
|
3
|
-
|
4
|
-
from bs4 import BeautifulSoup
|
5
|
-
from openpyxl import load_workbook
|
6
|
-
|
7
|
-
from .. import utils
|
8
|
-
from ..cache import Cache
|
9
|
-
|
10
|
-
__authors__ = ["zstumgoren", "Dilcia19", "ydoc5212", "palewire"]
|
11
|
-
__tags__ = ["historical", "excel"]
|
12
|
-
__source__ = {
|
13
|
-
"name": "New York Department of Labor",
|
14
|
-
"url": "https://dol.ny.gov/warn-notices",
|
15
|
-
}
|
16
|
-
|
17
|
-
logger = logging.getLogger(__name__)
|
18
|
-
|
19
|
-
|
20
|
-
def scrape(
|
21
|
-
data_dir: Path = utils.WARN_DATA_DIR,
|
22
|
-
cache_dir: Path = utils.WARN_CACHE_DIR,
|
23
|
-
) -> Path:
|
24
|
-
"""
|
25
|
-
Scrape data from New York.
|
26
|
-
|
27
|
-
Keyword arguments:
|
28
|
-
data_dir -- the Path were the result will be saved (default WARN_DATA_DIR)
|
29
|
-
cache_dir -- the Path where results can be cached (default WARN_CACHE_DIR)
|
30
|
-
|
31
|
-
Returns: the Path where the file is written
|
32
|
-
"""
|
33
|
-
cache = Cache(cache_dir)
|
34
|
-
|
35
|
-
# Get the latest HTML page
|
36
|
-
url_list = [
|
37
|
-
dict(year=2023, url="https://dol.ny.gov/warn-notices"),
|
38
|
-
dict(year=2022, url="https://dol.ny.gov/2022-warn-notices"),
|
39
|
-
dict(year=2021, url="https://dol.ny.gov/warn-notices-2021"),
|
40
|
-
]
|
41
|
-
|
42
|
-
# Loop through the urls and get the stuff
|
43
|
-
html_row_list = []
|
44
|
-
for config in url_list:
|
45
|
-
html_row_list += _get_html_data(cache, config)
|
46
|
-
|
47
|
-
# Get the historical static data file
|
48
|
-
excel_row_list = _get_historical_data(cache)
|
49
|
-
|
50
|
-
# Set the export path
|
51
|
-
data_path = data_dir / "ny.csv"
|
52
|
-
|
53
|
-
# Combine and write out the file
|
54
|
-
fieldnames = list(html_row_list[0].keys()) + list(excel_row_list[0].keys())
|
55
|
-
row_list = html_row_list + excel_row_list
|
56
|
-
utils.write_dict_rows_to_csv(
|
57
|
-
data_path,
|
58
|
-
fieldnames,
|
59
|
-
row_list,
|
60
|
-
extrasaction="ignore",
|
61
|
-
)
|
62
|
-
|
63
|
-
# Return the path to the file
|
64
|
-
return data_path
|
65
|
-
|
66
|
-
|
67
|
-
def _get_html_data(cache, config):
|
68
|
-
r = utils.get_url(config["url"])
|
69
|
-
html = r.text
|
70
|
-
|
71
|
-
# Save it to the cache
|
72
|
-
cache.write(f"ny/{config['year']}.html", html)
|
73
|
-
|
74
|
-
# Parse the HTML and grab our table
|
75
|
-
soup = BeautifulSoup(html, "html.parser")
|
76
|
-
table = soup.find("div", class_="landing-paragraphs").find("table")
|
77
|
-
|
78
|
-
row_list = []
|
79
|
-
# Loop through the rows of the table
|
80
|
-
for tr in table.find_all("tr")[1:]:
|
81
|
-
td_list = tr.find_all("td")
|
82
|
-
d = dict(
|
83
|
-
company_name=td_list[0].a.text,
|
84
|
-
notice_url=td_list[0].a["href"],
|
85
|
-
date_posted=td_list[1].text,
|
86
|
-
notice_dated=td_list[2].text,
|
87
|
-
)
|
88
|
-
row_list.append(d)
|
89
|
-
return row_list
|
90
|
-
|
91
|
-
|
92
|
-
def _get_historical_data(cache):
|
93
|
-
# Request the page and save it to the cache
|
94
|
-
url = (
|
95
|
-
"https://storage.googleapis.com/bln-data-public/warn-layoffs/ny_historical.xlsx"
|
96
|
-
)
|
97
|
-
|
98
|
-
excel_path = cache.download("ny/source.xlsx", url)
|
99
|
-
|
100
|
-
# Open it up
|
101
|
-
workbook = load_workbook(filename=excel_path)
|
102
|
-
|
103
|
-
# Get the first sheet
|
104
|
-
worksheet = workbook.worksheets[0]
|
105
|
-
|
106
|
-
# Convert the sheet to a list of lists
|
107
|
-
row_list = []
|
108
|
-
for r in worksheet.rows:
|
109
|
-
column = [cell.value for cell in r]
|
110
|
-
row_list.append(column)
|
111
|
-
|
112
|
-
# Transform this into a list of dictionaries with headers as keys
|
113
|
-
header_list = row_list.pop(0)
|
114
|
-
dict_list = []
|
115
|
-
for row in row_list:
|
116
|
-
d = {}
|
117
|
-
for i, cell in enumerate(row):
|
118
|
-
key = header_list[i]
|
119
|
-
# Skip any columns where the header is null
|
120
|
-
if key is None:
|
121
|
-
continue
|
122
|
-
d[key] = cell
|
123
|
-
dict_list.append(d)
|
124
|
-
|
125
|
-
# Return the list of dicts
|
126
|
-
return dict_list
|
127
|
-
|
128
|
-
|
129
|
-
if __name__ == "__main__":
|
130
|
-
scrape()
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{warn_scraper-1.2.102 → warn_scraper-1.2.103}/tests/cassettes/test_cached_search_results.yaml
RENAMED
File without changes
|
{warn_scraper-1.2.102 → warn_scraper-1.2.103}/tests/cassettes/test_missing_detail_page_values.yaml
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|