warn-scraper 1.2.82__tar.gz → 1.2.84__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. {warn_scraper-1.2.82/warn_scraper.egg-info → warn_scraper-1.2.84}/PKG-INFO +2 -2
  2. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/setup.py +1 -1
  3. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/warn/scrapers/md.py +37 -9
  4. {warn_scraper-1.2.82 → warn_scraper-1.2.84/warn_scraper.egg-info}/PKG-INFO +2 -2
  5. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/warn_scraper.egg-info/requires.txt +1 -1
  6. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/.devcontainer/devcontainer.json +0 -0
  7. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/.github/dependabot.yml.disabled +0 -0
  8. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/.github/workflows/continuous-deployment.yml +0 -0
  9. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/.gitignore +0 -0
  10. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/.pre-commit-config.yaml +0 -0
  11. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/LICENSE +0 -0
  12. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/MANIFEST.in +0 -0
  13. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/Makefile +0 -0
  14. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/Pipfile +0 -0
  15. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/Pipfile.lock +0 -0
  16. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/README.md +0 -0
  17. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/docs/Makefile +0 -0
  18. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/docs/_static/R42693.pdf +0 -0
  19. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/docs/_static/gao-03-1003.pdf +0 -0
  20. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/docs/_static/releasing-actions-finished.png +0 -0
  21. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/docs/_static/releasing-actions-start.png +0 -0
  22. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/docs/_static/releasing-changelog-button.png +0 -0
  23. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/docs/_static/releasing-changelog-entered.png +0 -0
  24. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/docs/_static/releasing-draft-button.png +0 -0
  25. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/docs/_static/releasing-name-release.png +0 -0
  26. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/docs/_static/releasing-name-tag.png +0 -0
  27. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/docs/_static/releasing-publish-button.png +0 -0
  28. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/docs/_static/releasing-pypi.png +0 -0
  29. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/docs/_static/releasing-release-published.png +0 -0
  30. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/docs/_static/releasing-releases-button.png +0 -0
  31. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/docs/_static/releasing-tag-button.png +0 -0
  32. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/docs/_templates/sources.md.tmpl +0 -0
  33. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/docs/conf.py +0 -0
  34. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/docs/contributing.rst +0 -0
  35. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/docs/index.rst +0 -0
  36. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/docs/make.bat +0 -0
  37. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/docs/reference.rst +0 -0
  38. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/docs/releasing.md +0 -0
  39. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/docs/requirements.txt +0 -0
  40. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/docs/scrapers/al.md +0 -0
  41. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/docs/scrapers/az.md +0 -0
  42. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/docs/scrapers/ca.md +0 -0
  43. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/docs/scrapers/co.md +0 -0
  44. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/docs/scrapers/dc.md +0 -0
  45. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/docs/scrapers/de.md +0 -0
  46. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/docs/scrapers/ia.md +0 -0
  47. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/docs/scrapers/in.md +0 -0
  48. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/docs/scrapers/job_center.md +0 -0
  49. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/docs/scrapers/ks.md +0 -0
  50. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/docs/scrapers/md.md +0 -0
  51. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/docs/scrapers/me.md +0 -0
  52. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/docs/scrapers/mo.md +0 -0
  53. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/docs/scrapers/ny.md +0 -0
  54. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/docs/scrapers/ok.md +0 -0
  55. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/docs/scrapers/or.md +0 -0
  56. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/docs/scrapers/sc.md +0 -0
  57. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/docs/scrapers/tx.md +0 -0
  58. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/docs/scrapers/ut.md +0 -0
  59. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/docs/scrapers/va.md +0 -0
  60. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/docs/scrapers/vt.md +0 -0
  61. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/docs/scrapers/wi.md +0 -0
  62. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/docs/sources.md +0 -0
  63. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/docs/usage.md +0 -0
  64. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/setup.cfg +0 -0
  65. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/tests/__init__.py +0 -0
  66. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/tests/cassettes/test_cached_detail_pages.yaml +0 -0
  67. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/tests/cassettes/test_cached_search_results.yaml +0 -0
  68. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/tests/cassettes/test_missing_detail_page_values.yaml +0 -0
  69. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/tests/cassettes/test_no_results.yaml +0 -0
  70. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/tests/cassettes/test_paged_results.yaml +0 -0
  71. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/tests/cassettes/test_scrape_integration.yaml +0 -0
  72. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/tests/conftest.py +0 -0
  73. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/tests/fixtures/2021_page_1.html +0 -0
  74. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/tests/fixtures/2021_page_2.html +0 -0
  75. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/tests/test_cache.py +0 -0
  76. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/tests/test_delete.py +0 -0
  77. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/tests/test_job_center.py +0 -0
  78. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/tests/test_job_center_cache.py +0 -0
  79. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/tests/test_openpyxl.py +0 -0
  80. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/warn/__init__.py +0 -0
  81. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/warn/cache.py +0 -0
  82. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/warn/cli.py +0 -0
  83. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/warn/platforms/__init__.py +0 -0
  84. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/warn/platforms/job_center/__init__.py +0 -0
  85. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/warn/platforms/job_center/cache.py +0 -0
  86. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/warn/platforms/job_center/site.py +0 -0
  87. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/warn/platforms/job_center/urls.py +0 -0
  88. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/warn/platforms/job_center/utils.py +0 -0
  89. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/warn/runner.py +0 -0
  90. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/warn/scrapers/__init__.py +0 -0
  91. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/warn/scrapers/ak.py +0 -0
  92. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/warn/scrapers/al.py +0 -0
  93. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/warn/scrapers/az.py +0 -0
  94. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/warn/scrapers/ca.py +0 -0
  95. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/warn/scrapers/co.py +0 -0
  96. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/warn/scrapers/ct.py +0 -0
  97. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/warn/scrapers/dc.py +0 -0
  98. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/warn/scrapers/de.py +0 -0
  99. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/warn/scrapers/fl.py +0 -0
  100. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/warn/scrapers/ga.py +0 -0
  101. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/warn/scrapers/hi.py +0 -0
  102. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/warn/scrapers/ia.py +0 -0
  103. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/warn/scrapers/id.py +0 -0
  104. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/warn/scrapers/il.py +0 -0
  105. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/warn/scrapers/in.py +0 -0
  106. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/warn/scrapers/ks.py +0 -0
  107. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/warn/scrapers/ky.py +0 -0
  108. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/warn/scrapers/la.py +0 -0
  109. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/warn/scrapers/me.py +0 -0
  110. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/warn/scrapers/mi.py +0 -0
  111. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/warn/scrapers/mo.py +0 -0
  112. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/warn/scrapers/mt.py +0 -0
  113. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/warn/scrapers/ne.py +0 -0
  114. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/warn/scrapers/nj.py +0 -0
  115. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/warn/scrapers/nm.py +0 -0
  116. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/warn/scrapers/ny.py +0 -0
  117. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/warn/scrapers/oh.py +0 -0
  118. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/warn/scrapers/ok.py +0 -0
  119. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/warn/scrapers/or.py +0 -0
  120. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/warn/scrapers/ri.py +0 -0
  121. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/warn/scrapers/sc.py +0 -0
  122. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/warn/scrapers/sd.py +0 -0
  123. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/warn/scrapers/tn.py +0 -0
  124. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/warn/scrapers/tx.py +0 -0
  125. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/warn/scrapers/ut.py +0 -0
  126. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/warn/scrapers/va.py +0 -0
  127. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/warn/scrapers/vt.py +0 -0
  128. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/warn/scrapers/wa.py +0 -0
  129. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/warn/scrapers/wi.py +0 -0
  130. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/warn/utils.py +0 -0
  131. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/warn_scraper.egg-info/SOURCES.txt +0 -0
  132. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/warn_scraper.egg-info/dependency_links.txt +0 -0
  133. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/warn_scraper.egg-info/entry_points.txt +0 -0
  134. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/warn_scraper.egg-info/not-zip-safe +0 -0
  135. {warn_scraper-1.2.82 → warn_scraper-1.2.84}/warn_scraper.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: warn-scraper
3
- Version: 1.2.82
3
+ Version: 1.2.84
4
4
  Summary: Command-line interface for downloading WARN Act notices of qualified plant closings and mass layoffs from state government websites
5
5
  Home-page: https://github.com/biglocalnews/warn-scraper
6
6
  Author: Big Local News
@@ -23,7 +23,7 @@ Classifier: Programming Language :: Python :: 3.10
23
23
  Description-Content-Type: text/markdown
24
24
  License-File: LICENSE
25
25
  Requires-Dist: click
26
- Requires-Dist: bs4
26
+ Requires-Dist: beautifulsoup4
27
27
  Requires-Dist: html5lib
28
28
  Requires-Dist: pdfplumber
29
29
  Requires-Dist: requests
@@ -128,7 +128,7 @@ setup(
128
128
  """,
129
129
  install_requires=[
130
130
  "click",
131
- "bs4",
131
+ "beautifulsoup4",
132
132
  "html5lib",
133
133
  "pdfplumber",
134
134
  "requests",
@@ -36,9 +36,13 @@ def scrape(
36
36
  # Set the cache
37
37
  cache = Cache(cache_dir)
38
38
 
39
+ # In November 2024 Maryland began throwing out many failed connection messages. These two things helped.
40
+ request_headers = {"User-Agent": "BigLocalNews.org"}
41
+ request_verify = False
42
+
39
43
  # Get the page
40
44
  url = "https://www.dllr.state.md.us/employment/warn.shtml"
41
- r = utils.get_url(url)
45
+ r = utils.get_url(url, headers=request_headers, verify=request_verify)
42
46
  r.encoding = "utf-8"
43
47
  html = r.text
44
48
 
@@ -56,17 +60,41 @@ def scrape(
56
60
  html_list = []
57
61
  html_list.append(html) # Save the source HTML for parsing also
58
62
 
63
+ old_pages = [
64
+ "warn2023.shtml",
65
+ "warn2022.shtml",
66
+ "warn2021.shtml",
67
+ "warn2020.shtml",
68
+ "warn2019.shtml",
69
+ "warn2018.shtml",
70
+ "warn2017.shtml",
71
+ "warn2016.shtml",
72
+ "warn2015.shtml",
73
+ "warn2014.shtml",
74
+ "warn2013.shtml",
75
+ "warn2012.shtml",
76
+ "warn2011.shtml",
77
+ "warn2010.shtml",
78
+ ]
79
+
59
80
  for href in href_list:
60
81
  # Request the HTML
61
82
  url = f"https://www.dllr.state.md.us/employment/{href}"
62
- r = utils.get_url(url)
63
- r.encoding = "utf-8"
64
- html = r.text
65
-
66
- # Save it to the cache
67
- cache.write(f"md/{href}.html", html)
68
-
69
- sleep(naptime) # Try to stop blocked connections by being less aggressive
83
+ filename = cache_dir / f"md/{href}.html"
84
+
85
+ if href not in old_pages:
86
+ sleep(naptime) # Try to stop blocked connections by being less aggressive
87
+ r = utils.get_url(url, headers=request_headers, verify=request_verify)
88
+ r.encoding = "utf-8"
89
+ html = r.text
90
+
91
+ # Save it to the cache
92
+ cache.write(filename, html)
93
+ else:
94
+ r = utils.fetch_if_not_cached(
95
+ filename, url, headers=request_headers, verify=request_verify
96
+ )
97
+ html = cache.read(filename)
70
98
 
71
99
  # Add it to the list
72
100
  html_list.append(html)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: warn-scraper
3
- Version: 1.2.82
3
+ Version: 1.2.84
4
4
  Summary: Command-line interface for downloading WARN Act notices of qualified plant closings and mass layoffs from state government websites
5
5
  Home-page: https://github.com/biglocalnews/warn-scraper
6
6
  Author: Big Local News
@@ -23,7 +23,7 @@ Classifier: Programming Language :: Python :: 3.10
23
23
  Description-Content-Type: text/markdown
24
24
  License-File: LICENSE
25
25
  Requires-Dist: click
26
- Requires-Dist: bs4
26
+ Requires-Dist: beautifulsoup4
27
27
  Requires-Dist: html5lib
28
28
  Requires-Dist: pdfplumber
29
29
  Requires-Dist: requests
@@ -1,5 +1,5 @@
1
1
  click
2
- bs4
2
+ beautifulsoup4
3
3
  html5lib
4
4
  pdfplumber
5
5
  requests
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes