warn-scraper 1.2.80__tar.gz → 1.2.82__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. {warn_scraper-1.2.80/warn_scraper.egg-info → warn_scraper-1.2.82}/PKG-INFO +1 -1
  2. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/warn/scrapers/md.py +7 -0
  3. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/warn/scrapers/ri.py +9 -3
  4. {warn_scraper-1.2.80 → warn_scraper-1.2.82/warn_scraper.egg-info}/PKG-INFO +1 -1
  5. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/.devcontainer/devcontainer.json +0 -0
  6. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/.github/dependabot.yml.disabled +0 -0
  7. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/.github/workflows/continuous-deployment.yml +0 -0
  8. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/.gitignore +0 -0
  9. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/.pre-commit-config.yaml +0 -0
  10. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/LICENSE +0 -0
  11. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/MANIFEST.in +0 -0
  12. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/Makefile +0 -0
  13. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/Pipfile +0 -0
  14. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/Pipfile.lock +0 -0
  15. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/README.md +0 -0
  16. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/docs/Makefile +0 -0
  17. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/docs/_static/R42693.pdf +0 -0
  18. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/docs/_static/gao-03-1003.pdf +0 -0
  19. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/docs/_static/releasing-actions-finished.png +0 -0
  20. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/docs/_static/releasing-actions-start.png +0 -0
  21. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/docs/_static/releasing-changelog-button.png +0 -0
  22. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/docs/_static/releasing-changelog-entered.png +0 -0
  23. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/docs/_static/releasing-draft-button.png +0 -0
  24. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/docs/_static/releasing-name-release.png +0 -0
  25. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/docs/_static/releasing-name-tag.png +0 -0
  26. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/docs/_static/releasing-publish-button.png +0 -0
  27. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/docs/_static/releasing-pypi.png +0 -0
  28. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/docs/_static/releasing-release-published.png +0 -0
  29. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/docs/_static/releasing-releases-button.png +0 -0
  30. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/docs/_static/releasing-tag-button.png +0 -0
  31. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/docs/_templates/sources.md.tmpl +0 -0
  32. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/docs/conf.py +0 -0
  33. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/docs/contributing.rst +0 -0
  34. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/docs/index.rst +0 -0
  35. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/docs/make.bat +0 -0
  36. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/docs/reference.rst +0 -0
  37. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/docs/releasing.md +0 -0
  38. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/docs/requirements.txt +0 -0
  39. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/docs/scrapers/al.md +0 -0
  40. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/docs/scrapers/az.md +0 -0
  41. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/docs/scrapers/ca.md +0 -0
  42. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/docs/scrapers/co.md +0 -0
  43. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/docs/scrapers/dc.md +0 -0
  44. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/docs/scrapers/de.md +0 -0
  45. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/docs/scrapers/ia.md +0 -0
  46. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/docs/scrapers/in.md +0 -0
  47. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/docs/scrapers/job_center.md +0 -0
  48. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/docs/scrapers/ks.md +0 -0
  49. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/docs/scrapers/md.md +0 -0
  50. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/docs/scrapers/me.md +0 -0
  51. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/docs/scrapers/mo.md +0 -0
  52. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/docs/scrapers/ny.md +0 -0
  53. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/docs/scrapers/ok.md +0 -0
  54. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/docs/scrapers/or.md +0 -0
  55. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/docs/scrapers/sc.md +0 -0
  56. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/docs/scrapers/tx.md +0 -0
  57. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/docs/scrapers/ut.md +0 -0
  58. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/docs/scrapers/va.md +0 -0
  59. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/docs/scrapers/vt.md +0 -0
  60. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/docs/scrapers/wi.md +0 -0
  61. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/docs/sources.md +0 -0
  62. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/docs/usage.md +0 -0
  63. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/setup.cfg +0 -0
  64. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/setup.py +0 -0
  65. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/tests/__init__.py +0 -0
  66. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/tests/cassettes/test_cached_detail_pages.yaml +0 -0
  67. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/tests/cassettes/test_cached_search_results.yaml +0 -0
  68. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/tests/cassettes/test_missing_detail_page_values.yaml +0 -0
  69. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/tests/cassettes/test_no_results.yaml +0 -0
  70. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/tests/cassettes/test_paged_results.yaml +0 -0
  71. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/tests/cassettes/test_scrape_integration.yaml +0 -0
  72. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/tests/conftest.py +0 -0
  73. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/tests/fixtures/2021_page_1.html +0 -0
  74. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/tests/fixtures/2021_page_2.html +0 -0
  75. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/tests/test_cache.py +0 -0
  76. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/tests/test_delete.py +0 -0
  77. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/tests/test_job_center.py +0 -0
  78. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/tests/test_job_center_cache.py +0 -0
  79. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/tests/test_openpyxl.py +0 -0
  80. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/warn/__init__.py +0 -0
  81. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/warn/cache.py +0 -0
  82. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/warn/cli.py +0 -0
  83. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/warn/platforms/__init__.py +0 -0
  84. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/warn/platforms/job_center/__init__.py +0 -0
  85. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/warn/platforms/job_center/cache.py +0 -0
  86. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/warn/platforms/job_center/site.py +0 -0
  87. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/warn/platforms/job_center/urls.py +0 -0
  88. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/warn/platforms/job_center/utils.py +0 -0
  89. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/warn/runner.py +0 -0
  90. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/warn/scrapers/__init__.py +0 -0
  91. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/warn/scrapers/ak.py +0 -0
  92. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/warn/scrapers/al.py +0 -0
  93. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/warn/scrapers/az.py +0 -0
  94. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/warn/scrapers/ca.py +0 -0
  95. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/warn/scrapers/co.py +0 -0
  96. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/warn/scrapers/ct.py +0 -0
  97. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/warn/scrapers/dc.py +0 -0
  98. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/warn/scrapers/de.py +0 -0
  99. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/warn/scrapers/fl.py +0 -0
  100. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/warn/scrapers/ga.py +0 -0
  101. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/warn/scrapers/hi.py +0 -0
  102. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/warn/scrapers/ia.py +0 -0
  103. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/warn/scrapers/id.py +0 -0
  104. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/warn/scrapers/il.py +0 -0
  105. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/warn/scrapers/in.py +0 -0
  106. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/warn/scrapers/ks.py +0 -0
  107. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/warn/scrapers/ky.py +0 -0
  108. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/warn/scrapers/la.py +0 -0
  109. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/warn/scrapers/me.py +0 -0
  110. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/warn/scrapers/mi.py +0 -0
  111. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/warn/scrapers/mo.py +0 -0
  112. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/warn/scrapers/mt.py +0 -0
  113. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/warn/scrapers/ne.py +0 -0
  114. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/warn/scrapers/nj.py +0 -0
  115. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/warn/scrapers/nm.py +0 -0
  116. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/warn/scrapers/ny.py +0 -0
  117. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/warn/scrapers/oh.py +0 -0
  118. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/warn/scrapers/ok.py +0 -0
  119. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/warn/scrapers/or.py +0 -0
  120. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/warn/scrapers/sc.py +0 -0
  121. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/warn/scrapers/sd.py +0 -0
  122. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/warn/scrapers/tn.py +0 -0
  123. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/warn/scrapers/tx.py +0 -0
  124. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/warn/scrapers/ut.py +0 -0
  125. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/warn/scrapers/va.py +0 -0
  126. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/warn/scrapers/vt.py +0 -0
  127. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/warn/scrapers/wa.py +0 -0
  128. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/warn/scrapers/wi.py +0 -0
  129. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/warn/utils.py +0 -0
  130. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/warn_scraper.egg-info/SOURCES.txt +0 -0
  131. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/warn_scraper.egg-info/dependency_links.txt +0 -0
  132. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/warn_scraper.egg-info/entry_points.txt +0 -0
  133. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/warn_scraper.egg-info/not-zip-safe +0 -0
  134. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/warn_scraper.egg-info/requires.txt +0 -0
  135. {warn_scraper-1.2.80 → warn_scraper-1.2.82}/warn_scraper.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: warn-scraper
3
- Version: 1.2.80
3
+ Version: 1.2.82
4
4
  Summary: Command-line interface for downloading WARN Act notices of qualified plant closings and mass layoffs from state government websites
5
5
  Home-page: https://github.com/biglocalnews/warn-scraper
6
6
  Author: Big Local News
@@ -1,6 +1,7 @@
1
1
  import logging
2
2
  import re
3
3
  from pathlib import Path
4
+ from time import sleep
4
5
 
5
6
  from bs4 import BeautifulSoup
6
7
 
@@ -16,6 +17,8 @@ __source__ = {
16
17
 
17
18
  logger = logging.getLogger(__name__)
18
19
 
20
+ naptime = 3
21
+
19
22
 
20
23
  def scrape(
21
24
  data_dir: Path = utils.WARN_DATA_DIR,
@@ -42,6 +45,8 @@ def scrape(
42
45
  # Save it to the cache
43
46
  cache.write("md/source.html", html)
44
47
 
48
+ sleep(naptime) # Try to stop blocked connections by being less aggressive
49
+
45
50
  # Parse the list of links
46
51
  soup = BeautifulSoup(html, "html.parser")
47
52
  a_list = soup.find_all("a", {"class": "sub"})
@@ -61,6 +66,8 @@ def scrape(
61
66
  # Save it to the cache
62
67
  cache.write(f"md/{href}.html", html)
63
68
 
69
+ sleep(naptime) # Try to stop blocked connections by being less aggressive
70
+
64
71
  # Add it to the list
65
72
  html_list.append(html)
66
73
 
@@ -61,6 +61,7 @@ def scrape(
61
61
  dirty_list.extend(localrows)
62
62
 
63
63
  headers = dirty_list[1] # Skip false header at position 0
64
+ headers = [x for x in headers if x is not None]
64
65
  headers[2] = (
65
66
  headers[2]
66
67
  .replace("Company Name ", "Company Name")
@@ -85,9 +86,14 @@ def scrape(
85
86
  logger.debug(f"Got : {row}")
86
87
  else:
87
88
  line = {}
88
- for i, fieldname in enumerate(headers):
89
- line[fieldname] = row[i]
90
- row_list.append(line)
89
+ if len(headers) > len(row):
90
+ logger.debug(
91
+ f"{len(row)} items found, vs. expected {len(headers)}. Dropping row: {row}"
92
+ )
93
+ else:
94
+ for i, fieldname in enumerate(headers):
95
+ line[fieldname] = row[i]
96
+ row_list.append(line)
91
97
  # dirty_list = None
92
98
  logger.debug(
93
99
  f"Successfully merged {len(row_list)-1:,} records from new spreadsheet."
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: warn-scraper
3
- Version: 1.2.80
3
+ Version: 1.2.82
4
4
  Summary: Command-line interface for downloading WARN Act notices of qualified plant closings and mass layoffs from state government websites
5
5
  Home-page: https://github.com/biglocalnews/warn-scraper
6
6
  Author: Big Local News
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes