warn-scraper 1.2.111__tar.gz → 1.2.112__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. {warn_scraper-1.2.111/warn_scraper.egg-info → warn_scraper-1.2.112}/PKG-INFO +1 -1
  2. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/warn/scrapers/la.py +46 -4
  3. {warn_scraper-1.2.111 → warn_scraper-1.2.112/warn_scraper.egg-info}/PKG-INFO +1 -1
  4. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/.devcontainer/devcontainer.json +0 -0
  5. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/.github/dependabot.yml.disabled-for-sanity +0 -0
  6. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/.github/workflows/continuous-deployment.yml +0 -0
  7. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/.github/workflows/continuous-deployment.yml.broken-tests +0 -0
  8. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/.gitignore +0 -0
  9. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/.pre-commit-config.yaml +0 -0
  10. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/LICENSE +0 -0
  11. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/MANIFEST.in +0 -0
  12. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/Makefile +0 -0
  13. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/Pipfile +0 -0
  14. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/Pipfile.lock +0 -0
  15. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/README.md +0 -0
  16. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/docs/Makefile +0 -0
  17. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/docs/_static/R42693.pdf +0 -0
  18. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/docs/_static/gao-03-1003.pdf +0 -0
  19. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/docs/_static/releasing-actions-finished.png +0 -0
  20. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/docs/_static/releasing-actions-start.png +0 -0
  21. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/docs/_static/releasing-changelog-button.png +0 -0
  22. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/docs/_static/releasing-changelog-entered.png +0 -0
  23. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/docs/_static/releasing-draft-button.png +0 -0
  24. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/docs/_static/releasing-name-release.png +0 -0
  25. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/docs/_static/releasing-name-tag.png +0 -0
  26. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/docs/_static/releasing-publish-button.png +0 -0
  27. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/docs/_static/releasing-pypi.png +0 -0
  28. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/docs/_static/releasing-release-published.png +0 -0
  29. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/docs/_static/releasing-releases-button.png +0 -0
  30. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/docs/_static/releasing-tag-button.png +0 -0
  31. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/docs/_templates/sources.md.tmpl +0 -0
  32. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/docs/conf.py +0 -0
  33. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/docs/contributing.rst +0 -0
  34. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/docs/index.rst +0 -0
  35. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/docs/make.bat +0 -0
  36. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/docs/reference.rst +0 -0
  37. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/docs/releasing.md +0 -0
  38. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/docs/requirements.txt +0 -0
  39. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/docs/scrapers/al.md +0 -0
  40. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/docs/scrapers/az.md +0 -0
  41. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/docs/scrapers/ca.md +0 -0
  42. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/docs/scrapers/co.md +0 -0
  43. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/docs/scrapers/dc.md +0 -0
  44. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/docs/scrapers/de.md +0 -0
  45. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/docs/scrapers/ia.md +0 -0
  46. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/docs/scrapers/in.md +0 -0
  47. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/docs/scrapers/job_center.md +0 -0
  48. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/docs/scrapers/ks.md +0 -0
  49. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/docs/scrapers/md.md +0 -0
  50. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/docs/scrapers/me.md +0 -0
  51. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/docs/scrapers/mo.md +0 -0
  52. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/docs/scrapers/ny.md +0 -0
  53. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/docs/scrapers/ok.md +0 -0
  54. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/docs/scrapers/or.md +0 -0
  55. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/docs/scrapers/sc.md +0 -0
  56. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/docs/scrapers/tx.md +0 -0
  57. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/docs/scrapers/ut.md +0 -0
  58. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/docs/scrapers/va.md +0 -0
  59. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/docs/scrapers/vt.md +0 -0
  60. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/docs/scrapers/wi.md +0 -0
  61. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/docs/sources.md +0 -0
  62. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/docs/usage.md +0 -0
  63. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/setup.cfg +0 -0
  64. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/setup.py +0 -0
  65. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/tests/__init__.py +0 -0
  66. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/tests/cassettes/test_cached_detail_pages.yaml +0 -0
  67. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/tests/cassettes/test_cached_search_results.yaml +0 -0
  68. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/tests/cassettes/test_missing_detail_page_values.yaml +0 -0
  69. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/tests/cassettes/test_no_results.yaml +0 -0
  70. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/tests/cassettes/test_paged_results.yaml +0 -0
  71. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/tests/cassettes/test_scrape_integration.yaml +0 -0
  72. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/tests/conftest.py +0 -0
  73. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/tests/fixtures/2021_page_1.html +0 -0
  74. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/tests/fixtures/2021_page_2.html +0 -0
  75. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/tests/test_cache.py +0 -0
  76. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/tests/test_delete.py +0 -0
  77. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/tests/test_job_center.py +0 -0
  78. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/tests/test_job_center_cache.py +0 -0
  79. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/tests/test_openpyxl.py +0 -0
  80. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/warn/__init__.py +0 -0
  81. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/warn/cache.py +0 -0
  82. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/warn/cli.py +0 -0
  83. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/warn/platforms/__init__.py +0 -0
  84. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/warn/platforms/job_center/__init__.py +0 -0
  85. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/warn/platforms/job_center/cache.py +0 -0
  86. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/warn/platforms/job_center/site.py +0 -0
  87. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/warn/platforms/job_center/urls.py +0 -0
  88. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/warn/platforms/job_center/utils.py +0 -0
  89. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/warn/runner.py +0 -0
  90. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/warn/scrapers/__init__.py +0 -0
  91. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/warn/scrapers/ak.py +0 -0
  92. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/warn/scrapers/al.py +0 -0
  93. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/warn/scrapers/az.py +0 -0
  94. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/warn/scrapers/ca.py +0 -0
  95. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/warn/scrapers/co.py +0 -0
  96. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/warn/scrapers/ct.py +0 -0
  97. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/warn/scrapers/dc.py +0 -0
  98. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/warn/scrapers/de.py +0 -0
  99. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/warn/scrapers/fl.py +0 -0
  100. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/warn/scrapers/ga.py +0 -0
  101. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/warn/scrapers/hi.py +0 -0
  102. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/warn/scrapers/ia.py +0 -0
  103. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/warn/scrapers/id.py +0 -0
  104. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/warn/scrapers/il.py +0 -0
  105. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/warn/scrapers/in.py +0 -0
  106. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/warn/scrapers/ks.py +0 -0
  107. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/warn/scrapers/ky.py +0 -0
  108. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/warn/scrapers/md.py +0 -0
  109. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/warn/scrapers/me.py +0 -0
  110. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/warn/scrapers/mi.py +0 -0
  111. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/warn/scrapers/mo.py +0 -0
  112. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/warn/scrapers/mt.py +0 -0
  113. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/warn/scrapers/ne.py +0 -0
  114. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/warn/scrapers/nj.py +0 -0
  115. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/warn/scrapers/nm.py +0 -0
  116. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/warn/scrapers/ny.py +0 -0
  117. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/warn/scrapers/oh.py +0 -0
  118. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/warn/scrapers/ok.py +0 -0
  119. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/warn/scrapers/or.py +0 -0
  120. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/warn/scrapers/ri.py +0 -0
  121. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/warn/scrapers/sc.py +0 -0
  122. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/warn/scrapers/sd.py +0 -0
  123. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/warn/scrapers/tn.py +0 -0
  124. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/warn/scrapers/tx.py +0 -0
  125. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/warn/scrapers/ut.py +0 -0
  126. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/warn/scrapers/va.py +0 -0
  127. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/warn/scrapers/vt.py +0 -0
  128. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/warn/scrapers/wa.py +0 -0
  129. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/warn/scrapers/wi.py +0 -0
  130. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/warn/utils.py +0 -0
  131. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/warn_scraper.egg-info/SOURCES.txt +0 -0
  132. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/warn_scraper.egg-info/dependency_links.txt +0 -0
  133. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/warn_scraper.egg-info/entry_points.txt +0 -0
  134. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/warn_scraper.egg-info/not-zip-safe +0 -0
  135. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/warn_scraper.egg-info/requires.txt +0 -0
  136. {warn_scraper-1.2.111 → warn_scraper-1.2.112}/warn_scraper.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: warn-scraper
3
- Version: 1.2.111
3
+ Version: 1.2.112
4
4
  Summary: Command-line interface for downloading WARN Act notices of qualified plant closings and mass layoffs from state government websites
5
5
  Home-page: https://github.com/biglocalnews/warn-scraper
6
6
  Author: Big Local News
@@ -1,16 +1,19 @@
1
1
  import logging
2
2
  import os
3
3
  import re
4
+ import sys
5
+ from base64 import b64decode
4
6
  from datetime import datetime
5
7
  from pathlib import Path
6
8
 
7
9
  import pdfplumber
10
+ import requests
8
11
  from bs4 import BeautifulSoup
9
12
 
10
13
  from .. import utils
11
14
  from ..cache import Cache
12
15
 
13
- __authors__ = ["chriszs"]
16
+ __authors__ = ["chriszs", "stucka"]
14
17
  __tags__ = ["html", "pdf"]
15
18
  __source__ = {
16
19
  "name": "Louisiana Workforce Commission",
@@ -33,6 +36,14 @@ def scrape(
33
36
 
34
37
  Returns: the Path where the file is written
35
38
  """
39
+ try:
40
+ zyte_api_key = os.environ["ZYTE_API_KEY"]
41
+ except KeyError:
42
+ logger.error(
43
+ "No ZYTE_API_KEY variable found in environment. Please get an API key from Zyte and export it."
44
+ )
45
+ sys.exit(1)
46
+
36
47
  # Fire up the cache
37
48
  cache = Cache(cache_dir)
38
49
 
@@ -43,10 +54,22 @@ def scrape(
43
54
 
44
55
  # Download the root page
45
56
  url = f"{base_url}Downloads/{file_base}.asp"
46
- html = utils.get_url(url).text
57
+ api_response = requests.post(
58
+ "https://api.zyte.com/v1/extract",
59
+ auth=(zyte_api_key, ""),
60
+ json={
61
+ "url": url,
62
+ "httpResponseBody": True,
63
+ "followRedirect": True,
64
+ },
65
+ )
66
+ html_bytes: bytes = b64decode(api_response.json()["httpResponseBody"])
67
+ # html = utils.get_url(url).text
68
+ html = html_bytes.decode("utf-8", errors="backslashreplace")
47
69
 
48
70
  # Save it to the cache
49
- cache_key = f"{state_code}/{file_base}.html"
71
+ cache_key = cache_dir / f"{state_code}/{file_base}.html"
72
+ utils.create_directory(Path(cache_key), is_file=True)
50
73
  cache.write(cache_key, html)
51
74
 
52
75
  # Parse out the links to WARN notice PDFs
@@ -59,9 +82,28 @@ def scrape(
59
82
  if "WARN Notices" in link.text:
60
83
  # Download the PDF
61
84
  pdf_url = f"{base_url}{link['href']}"
62
- pdf_path = _read_or_download(cache, state_code, pdf_url)
85
+ logger.debug(pdf_url)
86
+ api_response = requests.post(
87
+ "https://api.zyte.com/v1/extract",
88
+ auth=(zyte_api_key, ""),
89
+ json={
90
+ "url": pdf_url,
91
+ "httpResponseBody": True,
92
+ "followRedirect": True,
93
+ },
94
+ )
95
+ http_response_body: bytes = b64decode(
96
+ api_response.json()["httpResponseBody"]
97
+ )
98
+ pdf_path = cache_dir / f"{state_code}/{os.path.basename(pdf_url)}"
99
+
100
+ with open(pdf_path, "wb") as fp:
101
+ fp.write(http_response_body)
102
+
103
+ # pdf_path = _read_or_download(cache, state_code, pdf_url)
63
104
 
64
105
  # Process the PDF
106
+ logger.debug(f"Attempting to parse {pdf_path}")
65
107
  rows = _process_pdf(pdf_path)
66
108
  all_rows.extend(rows)
67
109
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: warn-scraper
3
- Version: 1.2.111
3
+ Version: 1.2.112
4
4
  Summary: Command-line interface for downloading WARN Act notices of qualified plant closings and mass layoffs from state government websites
5
5
  Home-page: https://github.com/biglocalnews/warn-scraper
6
6
  Author: Big Local News
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes