warn-scraper 1.2.113__tar.gz → 1.2.115__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/.pre-commit-config.yaml +1 -1
- {warn_scraper-1.2.113/warn_scraper.egg-info → warn_scraper-1.2.115}/PKG-INFO +1 -1
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/Pipfile +1 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/Pipfile.lock +10 -1
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/warn/platforms/job_center/site.py +1 -1
- warn_scraper-1.2.115/warn/scrapers/ok.py +118 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/warn/utils.py +2 -2
- {warn_scraper-1.2.113 → warn_scraper-1.2.115/warn_scraper.egg-info}/PKG-INFO +1 -1
- warn_scraper-1.2.113/warn/scrapers/ok.py +0 -42
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/.devcontainer/devcontainer.json +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/.github/dependabot.yml.disabled-for-sanity +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/.github/workflows/continuous-deployment.yml +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/.github/workflows/continuous-deployment.yml.broken-tests +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/.gitignore +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/LICENSE +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/MANIFEST.in +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/Makefile +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/README.md +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/docs/Makefile +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/docs/_static/R42693.pdf +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/docs/_static/gao-03-1003.pdf +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/docs/_static/releasing-actions-finished.png +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/docs/_static/releasing-actions-start.png +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/docs/_static/releasing-changelog-button.png +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/docs/_static/releasing-changelog-entered.png +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/docs/_static/releasing-draft-button.png +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/docs/_static/releasing-name-release.png +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/docs/_static/releasing-name-tag.png +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/docs/_static/releasing-publish-button.png +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/docs/_static/releasing-pypi.png +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/docs/_static/releasing-release-published.png +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/docs/_static/releasing-releases-button.png +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/docs/_static/releasing-tag-button.png +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/docs/_templates/sources.md.tmpl +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/docs/conf.py +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/docs/contributing.rst +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/docs/index.rst +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/docs/make.bat +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/docs/reference.rst +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/docs/releasing.md +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/docs/requirements.txt +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/docs/scrapers/al.md +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/docs/scrapers/az.md +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/docs/scrapers/ca.md +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/docs/scrapers/co.md +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/docs/scrapers/dc.md +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/docs/scrapers/de.md +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/docs/scrapers/ia.md +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/docs/scrapers/in.md +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/docs/scrapers/job_center.md +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/docs/scrapers/ks.md +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/docs/scrapers/md.md +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/docs/scrapers/me.md +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/docs/scrapers/mo.md +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/docs/scrapers/ny.md +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/docs/scrapers/ok.md +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/docs/scrapers/or.md +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/docs/scrapers/sc.md +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/docs/scrapers/tx.md +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/docs/scrapers/ut.md +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/docs/scrapers/va.md +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/docs/scrapers/vt.md +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/docs/scrapers/wi.md +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/docs/sources.md +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/docs/usage.md +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/setup.cfg +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/setup.py +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/tests/__init__.py +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/tests/cassettes/test_cached_detail_pages.yaml +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/tests/cassettes/test_cached_search_results.yaml +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/tests/cassettes/test_missing_detail_page_values.yaml +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/tests/cassettes/test_no_results.yaml +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/tests/cassettes/test_paged_results.yaml +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/tests/cassettes/test_scrape_integration.yaml +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/tests/conftest.py +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/tests/fixtures/2021_page_1.html +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/tests/fixtures/2021_page_2.html +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/tests/test_cache.py +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/tests/test_delete.py +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/tests/test_job_center.py +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/tests/test_job_center_cache.py +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/tests/test_openpyxl.py +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/warn/__init__.py +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/warn/cache.py +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/warn/cli.py +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/warn/platforms/__init__.py +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/warn/platforms/job_center/__init__.py +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/warn/platforms/job_center/cache.py +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/warn/platforms/job_center/urls.py +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/warn/platforms/job_center/utils.py +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/warn/runner.py +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/warn/scrapers/__init__.py +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/warn/scrapers/ak.py +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/warn/scrapers/al.py +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/warn/scrapers/az.py +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/warn/scrapers/ca.py +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/warn/scrapers/co.py +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/warn/scrapers/ct.py +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/warn/scrapers/dc.py +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/warn/scrapers/de.py +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/warn/scrapers/fl.py +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/warn/scrapers/ga.py +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/warn/scrapers/hi.py +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/warn/scrapers/ia.py +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/warn/scrapers/id.py +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/warn/scrapers/il.py +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/warn/scrapers/in.py +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/warn/scrapers/ks.py +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/warn/scrapers/ky.py +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/warn/scrapers/la.py +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/warn/scrapers/md.py +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/warn/scrapers/me.py +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/warn/scrapers/mi.py +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/warn/scrapers/mo.py +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/warn/scrapers/mt.py +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/warn/scrapers/ne.py +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/warn/scrapers/nj.py +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/warn/scrapers/nm.py +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/warn/scrapers/ny.py +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/warn/scrapers/oh.py +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/warn/scrapers/or.py +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/warn/scrapers/ri.py +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/warn/scrapers/sc.py +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/warn/scrapers/sd.py +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/warn/scrapers/tn.py +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/warn/scrapers/tx.py +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/warn/scrapers/ut.py +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/warn/scrapers/va.py +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/warn/scrapers/vt.py +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/warn/scrapers/wa.py +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/warn/scrapers/wi.py +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/warn_scraper.egg-info/SOURCES.txt +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/warn_scraper.egg-info/dependency_links.txt +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/warn_scraper.egg-info/entry_points.txt +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/warn_scraper.egg-info/not-zip-safe +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/warn_scraper.egg-info/requires.txt +0 -0
- {warn_scraper-1.2.113 → warn_scraper-1.2.115}/warn_scraper.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: warn-scraper
|
3
|
-
Version: 1.2.
|
3
|
+
Version: 1.2.115
|
4
4
|
Summary: Command-line interface for downloading WARN Act notices of qualified plant closings and mass layoffs from state government websites
|
5
5
|
Home-page: https://github.com/biglocalnews/warn-scraper
|
6
6
|
Author: Big Local News
|
@@ -1,7 +1,7 @@
|
|
1
1
|
{
|
2
2
|
"_meta": {
|
3
3
|
"hash": {
|
4
|
-
"sha256": "
|
4
|
+
"sha256": "b1cc4814822457ec04972c1a07991d1f983c946e8cdcfc315cbe1b6d9d84b2cd"
|
5
5
|
},
|
6
6
|
"pipfile-spec": 6,
|
7
7
|
"requires": {
|
@@ -1067,6 +1067,15 @@
|
|
1067
1067
|
"markers": "python_version >= '3.8'",
|
1068
1068
|
"version": "==2.1.0"
|
1069
1069
|
},
|
1070
|
+
"isort": {
|
1071
|
+
"hashes": [
|
1072
|
+
"sha256:1cb5df28dfbc742e490c5e41bad6da41b805b0a8be7bc93cd0fb2a8a890ac450",
|
1073
|
+
"sha256:2dc5d7f65c9678d94c88dfc29161a320eec67328bc97aad576874cb4be1e9615"
|
1074
|
+
],
|
1075
|
+
"index": "pypi",
|
1076
|
+
"markers": "python_full_version >= '3.9.0'",
|
1077
|
+
"version": "==6.0.1"
|
1078
|
+
},
|
1070
1079
|
"jaraco.classes": {
|
1071
1080
|
"hashes": [
|
1072
1081
|
"sha256:47a024b51d0239c0dd8c8540c6c7f484be3b8fcf0b2d85c13825780d3b3f3acd",
|
@@ -112,7 +112,7 @@ class Site:
|
|
112
112
|
logger.debug("Fetching from cache")
|
113
113
|
return self.cache.fetch(url, params)
|
114
114
|
else:
|
115
|
-
logger.debug("Pulling from the web")
|
115
|
+
logger.debug(f"Pulling from the web: {url} with params {params}")
|
116
116
|
response = requests.get(url, params=params, verify=self.verify)
|
117
117
|
logger.debug(f"Response code: {response.status_code}")
|
118
118
|
html = response.text
|
@@ -0,0 +1,118 @@
|
|
1
|
+
import logging
|
2
|
+
from pathlib import Path
|
3
|
+
|
4
|
+
import requests
|
5
|
+
|
6
|
+
from .. import utils
|
7
|
+
|
8
|
+
__authors__ = ["zstumgoren", "Dilcia19", "stucka"]
|
9
|
+
__tags__ = [""]
|
10
|
+
__source__ = {
|
11
|
+
"name": "Oklahoma Office of Workforces Development",
|
12
|
+
"url": "https://www.employoklahoma.gov/Participants/s/warnnotices",
|
13
|
+
}
|
14
|
+
|
15
|
+
logger = logging.getLogger(__name__)
|
16
|
+
|
17
|
+
|
18
|
+
def scrape(
|
19
|
+
data_dir: Path = utils.WARN_DATA_DIR,
|
20
|
+
cache_dir: Path = utils.WARN_CACHE_DIR,
|
21
|
+
use_cache: bool = True,
|
22
|
+
) -> Path:
|
23
|
+
"""
|
24
|
+
Scrape data from Oklahoma.
|
25
|
+
|
26
|
+
Keyword arguments:
|
27
|
+
data_dir -- the Path were the result will be saved (default WARN_DATA_DIR)
|
28
|
+
cache_dir -- the Path where results can be cached (default WARN_CACHE_DIR)
|
29
|
+
use_cache -- a Boolean indicating whether the cache should be used (default True)
|
30
|
+
|
31
|
+
Returns: the Path where the file is written
|
32
|
+
"""
|
33
|
+
output_csv = data_dir / "ok.csv"
|
34
|
+
# search_url = "https://okjobmatch.com/search/warn_lookups"
|
35
|
+
# search_url = "https://www.employoklahoma.gov/Participants/s/warnnotices"
|
36
|
+
posturl = "https://www.employoklahoma.gov/Participants/s/sfsites/aura?r=2&aura.ApexAction.execute=6"
|
37
|
+
|
38
|
+
# There are a bunch of hard-coded values in here that seem to work for at least a day.
|
39
|
+
# Undetermined:
|
40
|
+
# -- Will this continue working in the short- or medium-term?
|
41
|
+
# -- What is the signficance of each variable?
|
42
|
+
# -- How do we refresh these?
|
43
|
+
|
44
|
+
headers = {
|
45
|
+
"User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:142.0) Gecko/20100101 Firefox/142.0",
|
46
|
+
"Accept": "*/*",
|
47
|
+
"Accept-Language": "en-US,en;q=0.5",
|
48
|
+
"Accept-Encoding": "gzip, deflate, br, zstd",
|
49
|
+
"Referer": "https://www.employoklahoma.gov/Participants/s/warnnotices",
|
50
|
+
"X-SFDC-LDS-Endpoints": "ApexActionController.execute:ConfigurableLoginAndMaintenanceMessages.hasDocument, ApexActionController.execute:ConfigurableLoginAndMaintenanceMessages.checkJobExpiry, ApexActionController.execute:ConfigurableLoginAndMaintenanceMessages.checkResumeExpiry, ApexActionController.execute:ConfigurableLoginAndMaintenanceMessages.checkUIRegistered, ApexActionController.execute:ConfigurableLoginAndMaintenanceMessages.getLoginMaintenanceMessage, ApexActionController.execute:OESC_JS_getWARNLayoffNotices.getListofLayoffAccService",
|
51
|
+
"X-SFDC-Page-Scope-Id": "9c659a19-8020-41b0-a81c-36335e22801a",
|
52
|
+
"X-SFDC-Request-Id": "16140000007a08bd2f",
|
53
|
+
"X-SFDC-Page-Cache": "9439898463d86806",
|
54
|
+
"Content-Type": "application/x-www-form-urlencoded;charset=UTF-8",
|
55
|
+
"X-B3-TraceId": "856a2236ba7d283e",
|
56
|
+
"X-B3-SpanId": "b79b2da3a7dc4544",
|
57
|
+
"X-B3-Sampled": "0",
|
58
|
+
"Origin": "https://www.employoklahoma.gov",
|
59
|
+
"Connection": "keep-alive",
|
60
|
+
"Cookie": "renderCtx=%7B%22pageId%22%3A%223823bba2-3b00-4db7-aca6-5ca0eb67fc63%22%2C%22schema%22%3A%22Published%22%2C%22viewType%22%3A%22Published%22%2C%22brandingSetId%22%3A%22fa0b6362-0214-44b9-947d-2543eaab22c7%22%2C%22audienceIds%22%3A%22%22%7D; CookieConsentPolicy=0:1; LSKey-c$CookieConsentPolicy=0:1; pctrk=f3070d0c-7078-4062-96bb-de9e82cbb1db",
|
61
|
+
"Sec-Fetch-Dest": "empty",
|
62
|
+
"Sec-Fetch-Mode": "cors",
|
63
|
+
"Sec-Fetch-Site": "same-origin",
|
64
|
+
}
|
65
|
+
|
66
|
+
payload = "message=%7B%22actions%22%3A%5B%7B%22id%22%3A%22156%3Ba%22%2C%22descriptor%22%3A%22aura%3A%2F%2FApexActionController%2FACTION%24execute%22%2C%22callingDescriptor%22%3A%22UNKNOWN%22%2C%22params%22%3A%7B%22namespace%22%3A%22%22%2C%22classname%22%3A%22ConfigurableLoginAndMaintenanceMessages%22%2C%22method%22%3A%22hasDocument%22%2C%22params%22%3A%7B%7D%2C%22cacheable%22%3Afalse%2C%22isContinuation%22%3Afalse%7D%7D%2C%7B%22id%22%3A%22157%3Ba%22%2C%22descriptor%22%3A%22aura%3A%2F%2FApexActionController%2FACTION%24execute%22%2C%22callingDescriptor%22%3A%22UNKNOWN%22%2C%22params%22%3A%7B%22namespace%22%3A%22%22%2C%22classname%22%3A%22ConfigurableLoginAndMaintenanceMessages%22%2C%22method%22%3A%22checkJobExpiry%22%2C%22params%22%3A%7B%7D%2C%22cacheable%22%3Afalse%2C%22isContinuation%22%3Afalse%7D%7D%2C%7B%22id%22%3A%22158%3Ba%22%2C%22descriptor%22%3A%22aura%3A%2F%2FApexActionController%2FACTION%24execute%22%2C%22callingDescriptor%22%3A%22UNKNOWN%22%2C%22params%22%3A%7B%22namespace%22%3A%22%22%2C%22classname%22%3A%22ConfigurableLoginAndMaintenanceMessages%22%2C%22method%22%3A%22checkResumeExpiry%22%2C%22params%22%3A%7B%7D%2C%22cacheable%22%3Afalse%2C%22isContinuation%22%3Afalse%7D%7D%2C%7B%22id%22%3A%22159%3Ba%22%2C%22descriptor%22%3A%22aura%3A%2F%2FApexActionController%2FACTION%24execute%22%2C%22callingDescriptor%22%3A%22UNKNOWN%22%2C%22params%22%3A%7B%22namespace%22%3A%22%22%2C%22classname%22%3A%22ConfigurableLoginAndMaintenanceMessages%22%2C%22method%22%3A%22checkUIRegistered%22%2C%22params%22%3A%7B%7D%2C%22cacheable%22%3Afalse%2C%22isContinuation%22%3Afalse%7D%7D%2C%7B%22id%22%3A%22160%3Ba%22%2C%22descriptor%22%3A%22aura%3A%2F%2FApexActionController%2FACTION%24execute%22%2C%22callingDescriptor%22%3A%22UNKNOWN%22%2C%22params%22%3A%7B%22namespace%22%3A%22%22%2C%22classname%22%3A%22ConfigurableLoginAndMaintenanceMessages%22%2C%22method%22%3A%22getLoginMaintenanceMessage%22%2C%22params%22%3A%7B%22displayTo%22%3A%22Job%20Seekers%22%2C%22messageType%22%3A%22Portal%20Login%20Messages%22%7D%2C%22cacheable%22%3Afalse%2C%22isContinuation%22%3Afalse%7D%7D%2C%7B%22id%22%3A%22161%3Ba%22%2C%22descriptor%22%3A%22aura%3A%2F%2FApexActionController%2FACTION%24execute%22%2C%22callingDescriptor%22%3A%22UNKNOWN%22%2C%22params%22%3A%7B%22namespace%22%3A%22%22%2C%22classname%22%3A%22OESC_JS_getWARNLayoffNotices%22%2C%22method%22%3A%22getListofLayoffAccService%22%2C%22cacheable%22%3Afalse%2C%22isContinuation%22%3Afalse%7D%7D%5D%7D&aura.context=%7B%22mode%22%3A%22PROD%22%2C%22fwuid%22%3A%22eE5UbjZPdVlRT3M0d0xtOXc5MzVOQWg5TGxiTHU3MEQ5RnBMM0VzVXc1cmcxMi42MjkxNDU2LjE2Nzc3MjE2%22%2C%22app%22%3A%22siteforce%3AcommunityApp%22%2C%22loaded%22%3A%7B%22APPLICATION%40markup%3A%2F%2Fsiteforce%3AcommunityApp%22%3A%221305_7pTC6grCTP7M16KdvDQ-Xw%22%7D%2C%22dn%22%3A%5B%5D%2C%22globals%22%3A%7B%7D%2C%22uad%22%3Atrue%7D&aura.pageURI=%2FParticipants%2Fs%2Fwarnnotices&aura.token=null"
|
67
|
+
|
68
|
+
logger.debug(f"Attempting to send hard-coded data to {posturl}")
|
69
|
+
r = requests.post(posturl, headers=headers, data=payload)
|
70
|
+
rawdata = r.json()
|
71
|
+
|
72
|
+
for entry in rawdata["actions"]:
|
73
|
+
if (
|
74
|
+
entry["id"] == "161;a"
|
75
|
+
): # What is this value? Will this change? Also no idea.
|
76
|
+
cleanerdata = entry["returnValue"]["returnValue"]
|
77
|
+
"""
|
78
|
+
fields = set()
|
79
|
+
for entry in cleanerdata:
|
80
|
+
for field in entry:
|
81
|
+
fields.add(field)
|
82
|
+
{'Id',
|
83
|
+
'Launchpad__Layoff_Closure_Type__c',
|
84
|
+
'Launchpad__Notice_Date__c',
|
85
|
+
'OESC_Employer_City__c',
|
86
|
+
'OESC_Employer_Name__c',
|
87
|
+
'OESC_Employer_Zip_Code__c',
|
88
|
+
'RecordTypeId',
|
89
|
+
'Select_Local_Workforce_Board__c'}
|
90
|
+
"""
|
91
|
+
fields = {
|
92
|
+
"Id": "id",
|
93
|
+
"Launchpad__Layoff_Closure_Type__c": "closure_type",
|
94
|
+
"Launchpad__Notice_Date__c": "notice_date",
|
95
|
+
"OESC_Employer_City__c": "city",
|
96
|
+
"OESC_Employer_Name__c": "company_name",
|
97
|
+
"OESC_Employer_Zip_Code__c": "zip_code",
|
98
|
+
"RecordTypeId": "record_type_id",
|
99
|
+
"Select_Local_Workforce_Board__c": "workforce_board",
|
100
|
+
"jobs-not-in-this": "jobs",
|
101
|
+
}
|
102
|
+
|
103
|
+
masterlist = []
|
104
|
+
for entry in cleanerdata:
|
105
|
+
line = {}
|
106
|
+
for item in fields:
|
107
|
+
if item in entry:
|
108
|
+
line[fields[item]] = entry[item]
|
109
|
+
else:
|
110
|
+
line[fields[item]] = None
|
111
|
+
masterlist.append(line)
|
112
|
+
|
113
|
+
utils.write_dict_rows_to_csv(output_csv, list(fields.values()), masterlist)
|
114
|
+
return output_csv
|
115
|
+
|
116
|
+
|
117
|
+
if __name__ == "__main__":
|
118
|
+
scrape()
|
@@ -209,12 +209,12 @@ def write_rows_to_csv(output_path: Path, rows: list, mode="w"):
|
|
209
209
|
|
210
210
|
|
211
211
|
def write_dict_rows_to_csv(output_path, headers, rows, mode="w", extrasaction="raise"):
|
212
|
-
"""Write the provided
|
212
|
+
"""Write the provided list of dictionaries to the provided path as comma-separated values.
|
213
213
|
|
214
214
|
Args:
|
215
215
|
output_path (Path): the Path were the result will be saved
|
216
216
|
headers (list): a list of the headers for the output file
|
217
|
-
rows (list): the
|
217
|
+
rows (list): the list of dictionaries to be saved
|
218
218
|
mode (str): the mode to be used when opening the file (default 'w')
|
219
219
|
extrasaction (str): what to do if the if a field isn't in the headers (default 'raise')
|
220
220
|
"""
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: warn-scraper
|
3
|
-
Version: 1.2.
|
3
|
+
Version: 1.2.115
|
4
4
|
Summary: Command-line interface for downloading WARN Act notices of qualified plant closings and mass layoffs from state government websites
|
5
5
|
Home-page: https://github.com/biglocalnews/warn-scraper
|
6
6
|
Author: Big Local News
|
@@ -1,42 +0,0 @@
|
|
1
|
-
from pathlib import Path
|
2
|
-
|
3
|
-
from warn.platforms.job_center.utils import scrape_state
|
4
|
-
|
5
|
-
from .. import utils
|
6
|
-
|
7
|
-
__authors__ = ["zstumgoren", "Dilcia19"]
|
8
|
-
__tags__ = ["jobcenter"]
|
9
|
-
__source__ = {
|
10
|
-
"name": "Oklahoma Office of Workforces Development",
|
11
|
-
"url": "https://okjobmatch.com/search/warn_lookups/new",
|
12
|
-
}
|
13
|
-
|
14
|
-
|
15
|
-
def scrape(
|
16
|
-
data_dir: Path = utils.WARN_DATA_DIR,
|
17
|
-
cache_dir: Path = utils.WARN_CACHE_DIR,
|
18
|
-
use_cache: bool = True,
|
19
|
-
) -> Path:
|
20
|
-
"""
|
21
|
-
Scrape data from Oklahoma.
|
22
|
-
|
23
|
-
Keyword arguments:
|
24
|
-
data_dir -- the Path were the result will be saved (default WARN_DATA_DIR)
|
25
|
-
cache_dir -- the Path where results can be cached (default WARN_CACHE_DIR)
|
26
|
-
use_cache -- a Boolean indicating whether the cache should be used (default True)
|
27
|
-
|
28
|
-
Returns: the Path where the file is written
|
29
|
-
"""
|
30
|
-
output_csv = data_dir / "ok.csv"
|
31
|
-
search_url = "https://okjobmatch.com/search/warn_lookups"
|
32
|
-
# Date chosen based on manual research
|
33
|
-
stop_year = 1999
|
34
|
-
# Use cache for years before current and prior year
|
35
|
-
scrape_state(
|
36
|
-
"OK", search_url, output_csv, stop_year, cache_dir, use_cache=use_cache
|
37
|
-
)
|
38
|
-
return output_csv
|
39
|
-
|
40
|
-
|
41
|
-
if __name__ == "__main__":
|
42
|
-
scrape()
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{warn_scraper-1.2.113 → warn_scraper-1.2.115}/tests/cassettes/test_cached_search_results.yaml
RENAMED
File without changes
|
{warn_scraper-1.2.113 → warn_scraper-1.2.115}/tests/cassettes/test_missing_detail_page_values.yaml
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|