warn-scraper 1.2.73__tar.gz → 1.2.75__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. {warn-scraper-1.2.73/warn_scraper.egg-info → warn-scraper-1.2.75}/PKG-INFO +1 -1
  2. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/ca.py +19 -14
  3. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/id.py +6 -8
  4. {warn-scraper-1.2.73 → warn-scraper-1.2.75/warn_scraper.egg-info}/PKG-INFO +1 -1
  5. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/.devcontainer/devcontainer.json +0 -0
  6. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/.github/dependabot.yml.disabled +0 -0
  7. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/.github/workflows/continuous-deployment.yml +0 -0
  8. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/.gitignore +0 -0
  9. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/.pre-commit-config.yaml +0 -0
  10. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/LICENSE +0 -0
  11. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/MANIFEST.in +0 -0
  12. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/Makefile +0 -0
  13. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/Pipfile +0 -0
  14. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/Pipfile.lock +0 -0
  15. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/README.md +0 -0
  16. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/Makefile +0 -0
  17. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/_static/R42693.pdf +0 -0
  18. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/_static/gao-03-1003.pdf +0 -0
  19. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/_static/releasing-actions-finished.png +0 -0
  20. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/_static/releasing-actions-start.png +0 -0
  21. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/_static/releasing-changelog-button.png +0 -0
  22. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/_static/releasing-changelog-entered.png +0 -0
  23. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/_static/releasing-draft-button.png +0 -0
  24. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/_static/releasing-name-release.png +0 -0
  25. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/_static/releasing-name-tag.png +0 -0
  26. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/_static/releasing-publish-button.png +0 -0
  27. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/_static/releasing-pypi.png +0 -0
  28. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/_static/releasing-release-published.png +0 -0
  29. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/_static/releasing-releases-button.png +0 -0
  30. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/_static/releasing-tag-button.png +0 -0
  31. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/_templates/sources.md.tmpl +0 -0
  32. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/conf.py +0 -0
  33. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/contributing.rst +0 -0
  34. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/index.rst +0 -0
  35. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/make.bat +0 -0
  36. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/reference.rst +0 -0
  37. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/releasing.md +0 -0
  38. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/requirements.txt +0 -0
  39. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/scrapers/al.md +0 -0
  40. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/scrapers/az.md +0 -0
  41. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/scrapers/ca.md +0 -0
  42. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/scrapers/co.md +0 -0
  43. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/scrapers/dc.md +0 -0
  44. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/scrapers/de.md +0 -0
  45. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/scrapers/ia.md +0 -0
  46. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/scrapers/in.md +0 -0
  47. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/scrapers/job_center.md +0 -0
  48. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/scrapers/ks.md +0 -0
  49. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/scrapers/md.md +0 -0
  50. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/scrapers/me.md +0 -0
  51. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/scrapers/mo.md +0 -0
  52. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/scrapers/ny.md +0 -0
  53. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/scrapers/ok.md +0 -0
  54. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/scrapers/or.md +0 -0
  55. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/scrapers/sc.md +0 -0
  56. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/scrapers/tx.md +0 -0
  57. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/scrapers/ut.md +0 -0
  58. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/scrapers/va.md +0 -0
  59. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/scrapers/vt.md +0 -0
  60. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/scrapers/wi.md +0 -0
  61. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/sources.md +0 -0
  62. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/docs/usage.md +0 -0
  63. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/setup.cfg +0 -0
  64. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/setup.py +0 -0
  65. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/tests/__init__.py +0 -0
  66. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/tests/cassettes/test_cached_detail_pages.yaml +0 -0
  67. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/tests/cassettes/test_cached_search_results.yaml +0 -0
  68. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/tests/cassettes/test_missing_detail_page_values.yaml +0 -0
  69. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/tests/cassettes/test_no_results.yaml +0 -0
  70. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/tests/cassettes/test_paged_results.yaml +0 -0
  71. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/tests/cassettes/test_scrape_integration.yaml +0 -0
  72. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/tests/conftest.py +0 -0
  73. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/tests/fixtures/2021_page_1.html +0 -0
  74. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/tests/fixtures/2021_page_2.html +0 -0
  75. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/tests/test_cache.py +0 -0
  76. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/tests/test_delete.py +0 -0
  77. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/tests/test_job_center.py +0 -0
  78. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/tests/test_job_center_cache.py +0 -0
  79. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/tests/test_openpyxl.py +0 -0
  80. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/__init__.py +0 -0
  81. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/cache.py +0 -0
  82. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/cli.py +0 -0
  83. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/platforms/__init__.py +0 -0
  84. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/platforms/job_center/__init__.py +0 -0
  85. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/platforms/job_center/cache.py +0 -0
  86. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/platforms/job_center/site.py +0 -0
  87. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/platforms/job_center/urls.py +0 -0
  88. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/platforms/job_center/utils.py +0 -0
  89. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/runner.py +0 -0
  90. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/__init__.py +0 -0
  91. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/ak.py +0 -0
  92. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/al.py +0 -0
  93. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/az.py +0 -0
  94. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/co.py +0 -0
  95. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/ct.py +0 -0
  96. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/dc.py +0 -0
  97. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/de.py +0 -0
  98. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/fl.py +0 -0
  99. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/ga.py +0 -0
  100. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/hi.py +0 -0
  101. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/ia.py +0 -0
  102. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/il.py +0 -0
  103. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/in.py +0 -0
  104. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/ks.py +0 -0
  105. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/ky.py +0 -0
  106. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/la.py +0 -0
  107. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/md.py +0 -0
  108. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/me.py +0 -0
  109. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/mi.py +0 -0
  110. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/mo.py +0 -0
  111. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/mt.py +0 -0
  112. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/ne.py +0 -0
  113. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/nj.py +0 -0
  114. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/nm.py +0 -0
  115. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/ny.py +0 -0
  116. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/oh.py +0 -0
  117. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/ok.py +0 -0
  118. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/or.py +0 -0
  119. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/ri.py +0 -0
  120. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/sc.py +0 -0
  121. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/sd.py +0 -0
  122. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/tn.py +0 -0
  123. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/tx.py +0 -0
  124. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/ut.py +0 -0
  125. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/va.py +0 -0
  126. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/vt.py +0 -0
  127. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/wa.py +0 -0
  128. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/scrapers/wi.py +0 -0
  129. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn/utils.py +0 -0
  130. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn_scraper.egg-info/SOURCES.txt +0 -0
  131. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn_scraper.egg-info/dependency_links.txt +0 -0
  132. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn_scraper.egg-info/entry_points.txt +0 -0
  133. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn_scraper.egg-info/not-zip-safe +0 -0
  134. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn_scraper.egg-info/requires.txt +0 -0
  135. {warn-scraper-1.2.73 → warn-scraper-1.2.75}/warn_scraper.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: warn-scraper
3
- Version: 1.2.73
3
+ Version: 1.2.75
4
4
  Summary: Command-line interface for downloading WARN Act notices of qualified plant closings and mass layoffs from state government websites
5
5
  Home-page: https://github.com/biglocalnews/warn-scraper
6
6
  Author: Big Local News
@@ -224,20 +224,25 @@ def _extract_pdf_data(pdf_path):
224
224
  if "summary" in first_cell:
225
225
  continue
226
226
  for row in rows:
227
- data_row = {}
228
- for i, value in enumerate(row):
229
- this_raw_header = raw_header[i]
230
- this_clean_header = header_crosswalk[this_raw_header]
231
- data_row[this_clean_header] = value
232
- # Data clean-ups
233
- data_row.update(
234
- {
235
- "effective_date": data_row["effective_date"].replace(" ", ""),
236
- "received_date": data_row["received_date"].replace(" ", ""),
237
- "source_file": str(pdf_path).split("/")[-1],
238
- }
239
- )
240
- data.append(data_row)
227
+ # Summary rows have an extra field, and the above code does not
228
+ # block the summary table from being parsed if it jumps onto another page.
229
+ if len(row) != len(raw_header) + 1:
230
+ data_row = {}
231
+ for i, value in enumerate(row):
232
+ this_raw_header = raw_header[i]
233
+ this_clean_header = header_crosswalk[this_raw_header]
234
+ data_row[this_clean_header] = value
235
+ # Data clean-ups
236
+ data_row.update(
237
+ {
238
+ "effective_date": data_row["effective_date"].replace(
239
+ " ", ""
240
+ ),
241
+ "received_date": data_row["received_date"].replace(" ", ""),
242
+ "source_file": str(pdf_path).split("/")[-1],
243
+ }
244
+ )
245
+ data.append(data_row)
241
246
  return data
242
247
 
243
248
 
@@ -12,7 +12,7 @@ __authors__ = ["chriszs", "stucka"]
12
12
  __tags__ = ["pdf"]
13
13
  __source__ = {
14
14
  "name": "Idaho Department of Labor",
15
- "url": "https://www.labor.idaho.gov/dnn/Businesses/Layoff-Assistance#2",
15
+ "url": "https://www.labor.idaho.gov/warnnotice/",
16
16
  }
17
17
 
18
18
  logger = logging.getLogger(__name__)
@@ -32,7 +32,7 @@ def scrape(
32
32
  Returns: the Path where the file is written
33
33
  """
34
34
  # Create the URL of the source PDF
35
- base_url = "https://www.labor.idaho.gov/dnn/Portals/0/Publications/"
35
+ base_url = "https://www.labor.idaho.gov/warnnotice/"
36
36
  file_name = "WARNNotice.pdf"
37
37
  # There's a numeric parameter called v on this PDF URL that updates
38
38
  # from time to time. Suspect this is a cache-buster. We're using a
@@ -40,10 +40,8 @@ def scrape(
40
40
  min_cache_buster = 0
41
41
  max_cache_buster = 10000000000
42
42
  cache_buster = random.randrange(min_cache_buster, max_cache_buster)
43
- url = f"{base_url}{file_name}?v={cache_buster}"
43
+ url = f"{base_url}?v={cache_buster}"
44
44
 
45
- # Download the PDF with verify=False because
46
- # there's a persistent cert error we're working around.
47
45
  cache = Cache(cache_dir)
48
46
  state_code = "id"
49
47
  cache_key = f"{state_code}/{file_name}"
@@ -126,9 +124,9 @@ def filter_garbage_rows(incoming: list):
126
124
  badrows += 1
127
125
  if badrows == 0:
128
126
  logger.debug("No bad rows found.")
129
- logger.debug(
130
- f"!!!!! {badrows:,} bad rows dropped from the data set with insufficient number of fields."
131
- )
127
+ logger.debug(
128
+ f"!!!!! {badrows:,} bad rows dropped from the data set with insufficient number of fields."
129
+ )
132
130
  return outgoing
133
131
 
134
132
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: warn-scraper
3
- Version: 1.2.73
3
+ Version: 1.2.75
4
4
  Summary: Command-line interface for downloading WARN Act notices of qualified plant closings and mass layoffs from state government websites
5
5
  Home-page: https://github.com/biglocalnews/warn-scraper
6
6
  Author: Big Local News
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes