warn-scraper 1.2.83__tar.gz → 1.2.85__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. {warn_scraper-1.2.83/warn_scraper.egg-info → warn_scraper-1.2.85}/PKG-INFO +1 -1
  2. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/warn/scrapers/md.py +37 -9
  3. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/warn/scrapers/tx.py +4 -2
  4. {warn_scraper-1.2.83 → warn_scraper-1.2.85/warn_scraper.egg-info}/PKG-INFO +1 -1
  5. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/.devcontainer/devcontainer.json +0 -0
  6. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/.github/dependabot.yml.disabled +0 -0
  7. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/.github/workflows/continuous-deployment.yml +0 -0
  8. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/.gitignore +0 -0
  9. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/.pre-commit-config.yaml +0 -0
  10. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/LICENSE +0 -0
  11. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/MANIFEST.in +0 -0
  12. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/Makefile +0 -0
  13. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/Pipfile +0 -0
  14. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/Pipfile.lock +0 -0
  15. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/README.md +0 -0
  16. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/docs/Makefile +0 -0
  17. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/docs/_static/R42693.pdf +0 -0
  18. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/docs/_static/gao-03-1003.pdf +0 -0
  19. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/docs/_static/releasing-actions-finished.png +0 -0
  20. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/docs/_static/releasing-actions-start.png +0 -0
  21. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/docs/_static/releasing-changelog-button.png +0 -0
  22. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/docs/_static/releasing-changelog-entered.png +0 -0
  23. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/docs/_static/releasing-draft-button.png +0 -0
  24. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/docs/_static/releasing-name-release.png +0 -0
  25. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/docs/_static/releasing-name-tag.png +0 -0
  26. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/docs/_static/releasing-publish-button.png +0 -0
  27. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/docs/_static/releasing-pypi.png +0 -0
  28. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/docs/_static/releasing-release-published.png +0 -0
  29. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/docs/_static/releasing-releases-button.png +0 -0
  30. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/docs/_static/releasing-tag-button.png +0 -0
  31. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/docs/_templates/sources.md.tmpl +0 -0
  32. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/docs/conf.py +0 -0
  33. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/docs/contributing.rst +0 -0
  34. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/docs/index.rst +0 -0
  35. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/docs/make.bat +0 -0
  36. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/docs/reference.rst +0 -0
  37. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/docs/releasing.md +0 -0
  38. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/docs/requirements.txt +0 -0
  39. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/docs/scrapers/al.md +0 -0
  40. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/docs/scrapers/az.md +0 -0
  41. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/docs/scrapers/ca.md +0 -0
  42. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/docs/scrapers/co.md +0 -0
  43. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/docs/scrapers/dc.md +0 -0
  44. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/docs/scrapers/de.md +0 -0
  45. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/docs/scrapers/ia.md +0 -0
  46. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/docs/scrapers/in.md +0 -0
  47. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/docs/scrapers/job_center.md +0 -0
  48. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/docs/scrapers/ks.md +0 -0
  49. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/docs/scrapers/md.md +0 -0
  50. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/docs/scrapers/me.md +0 -0
  51. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/docs/scrapers/mo.md +0 -0
  52. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/docs/scrapers/ny.md +0 -0
  53. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/docs/scrapers/ok.md +0 -0
  54. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/docs/scrapers/or.md +0 -0
  55. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/docs/scrapers/sc.md +0 -0
  56. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/docs/scrapers/tx.md +0 -0
  57. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/docs/scrapers/ut.md +0 -0
  58. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/docs/scrapers/va.md +0 -0
  59. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/docs/scrapers/vt.md +0 -0
  60. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/docs/scrapers/wi.md +0 -0
  61. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/docs/sources.md +0 -0
  62. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/docs/usage.md +0 -0
  63. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/setup.cfg +0 -0
  64. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/setup.py +0 -0
  65. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/tests/__init__.py +0 -0
  66. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/tests/cassettes/test_cached_detail_pages.yaml +0 -0
  67. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/tests/cassettes/test_cached_search_results.yaml +0 -0
  68. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/tests/cassettes/test_missing_detail_page_values.yaml +0 -0
  69. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/tests/cassettes/test_no_results.yaml +0 -0
  70. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/tests/cassettes/test_paged_results.yaml +0 -0
  71. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/tests/cassettes/test_scrape_integration.yaml +0 -0
  72. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/tests/conftest.py +0 -0
  73. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/tests/fixtures/2021_page_1.html +0 -0
  74. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/tests/fixtures/2021_page_2.html +0 -0
  75. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/tests/test_cache.py +0 -0
  76. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/tests/test_delete.py +0 -0
  77. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/tests/test_job_center.py +0 -0
  78. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/tests/test_job_center_cache.py +0 -0
  79. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/tests/test_openpyxl.py +0 -0
  80. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/warn/__init__.py +0 -0
  81. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/warn/cache.py +0 -0
  82. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/warn/cli.py +0 -0
  83. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/warn/platforms/__init__.py +0 -0
  84. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/warn/platforms/job_center/__init__.py +0 -0
  85. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/warn/platforms/job_center/cache.py +0 -0
  86. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/warn/platforms/job_center/site.py +0 -0
  87. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/warn/platforms/job_center/urls.py +0 -0
  88. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/warn/platforms/job_center/utils.py +0 -0
  89. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/warn/runner.py +0 -0
  90. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/warn/scrapers/__init__.py +0 -0
  91. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/warn/scrapers/ak.py +0 -0
  92. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/warn/scrapers/al.py +0 -0
  93. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/warn/scrapers/az.py +0 -0
  94. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/warn/scrapers/ca.py +0 -0
  95. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/warn/scrapers/co.py +0 -0
  96. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/warn/scrapers/ct.py +0 -0
  97. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/warn/scrapers/dc.py +0 -0
  98. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/warn/scrapers/de.py +0 -0
  99. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/warn/scrapers/fl.py +0 -0
  100. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/warn/scrapers/ga.py +0 -0
  101. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/warn/scrapers/hi.py +0 -0
  102. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/warn/scrapers/ia.py +0 -0
  103. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/warn/scrapers/id.py +0 -0
  104. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/warn/scrapers/il.py +0 -0
  105. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/warn/scrapers/in.py +0 -0
  106. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/warn/scrapers/ks.py +0 -0
  107. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/warn/scrapers/ky.py +0 -0
  108. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/warn/scrapers/la.py +0 -0
  109. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/warn/scrapers/me.py +0 -0
  110. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/warn/scrapers/mi.py +0 -0
  111. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/warn/scrapers/mo.py +0 -0
  112. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/warn/scrapers/mt.py +0 -0
  113. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/warn/scrapers/ne.py +0 -0
  114. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/warn/scrapers/nj.py +0 -0
  115. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/warn/scrapers/nm.py +0 -0
  116. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/warn/scrapers/ny.py +0 -0
  117. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/warn/scrapers/oh.py +0 -0
  118. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/warn/scrapers/ok.py +0 -0
  119. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/warn/scrapers/or.py +0 -0
  120. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/warn/scrapers/ri.py +0 -0
  121. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/warn/scrapers/sc.py +0 -0
  122. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/warn/scrapers/sd.py +0 -0
  123. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/warn/scrapers/tn.py +0 -0
  124. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/warn/scrapers/ut.py +0 -0
  125. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/warn/scrapers/va.py +0 -0
  126. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/warn/scrapers/vt.py +0 -0
  127. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/warn/scrapers/wa.py +0 -0
  128. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/warn/scrapers/wi.py +0 -0
  129. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/warn/utils.py +0 -0
  130. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/warn_scraper.egg-info/SOURCES.txt +0 -0
  131. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/warn_scraper.egg-info/dependency_links.txt +0 -0
  132. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/warn_scraper.egg-info/entry_points.txt +0 -0
  133. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/warn_scraper.egg-info/not-zip-safe +0 -0
  134. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/warn_scraper.egg-info/requires.txt +0 -0
  135. {warn_scraper-1.2.83 → warn_scraper-1.2.85}/warn_scraper.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: warn-scraper
3
- Version: 1.2.83
3
+ Version: 1.2.85
4
4
  Summary: Command-line interface for downloading WARN Act notices of qualified plant closings and mass layoffs from state government websites
5
5
  Home-page: https://github.com/biglocalnews/warn-scraper
6
6
  Author: Big Local News
@@ -36,9 +36,13 @@ def scrape(
36
36
  # Set the cache
37
37
  cache = Cache(cache_dir)
38
38
 
39
+ # In November 2024 Maryland began throwing out many failed connection messages. These two things helped.
40
+ request_headers = {"User-Agent": "BigLocalNews.org"}
41
+ request_verify = False
42
+
39
43
  # Get the page
40
44
  url = "https://www.dllr.state.md.us/employment/warn.shtml"
41
- r = utils.get_url(url)
45
+ r = utils.get_url(url, headers=request_headers, verify=request_verify)
42
46
  r.encoding = "utf-8"
43
47
  html = r.text
44
48
 
@@ -56,17 +60,41 @@ def scrape(
56
60
  html_list = []
57
61
  html_list.append(html) # Save the source HTML for parsing also
58
62
 
63
+ old_pages = [
64
+ "warn2023.shtml",
65
+ "warn2022.shtml",
66
+ "warn2021.shtml",
67
+ "warn2020.shtml",
68
+ "warn2019.shtml",
69
+ "warn2018.shtml",
70
+ "warn2017.shtml",
71
+ "warn2016.shtml",
72
+ "warn2015.shtml",
73
+ "warn2014.shtml",
74
+ "warn2013.shtml",
75
+ "warn2012.shtml",
76
+ "warn2011.shtml",
77
+ "warn2010.shtml",
78
+ ]
79
+
59
80
  for href in href_list:
60
81
  # Request the HTML
61
82
  url = f"https://www.dllr.state.md.us/employment/{href}"
62
- r = utils.get_url(url)
63
- r.encoding = "utf-8"
64
- html = r.text
65
-
66
- # Save it to the cache
67
- cache.write(f"md/{href}.html", html)
68
-
69
- sleep(naptime) # Try to stop blocked connections by being less aggressive
83
+ filename = cache_dir / f"md/{href}.html"
84
+
85
+ if href not in old_pages:
86
+ sleep(naptime) # Try to stop blocked connections by being less aggressive
87
+ r = utils.get_url(url, headers=request_headers, verify=request_verify)
88
+ r.encoding = "utf-8"
89
+ html = r.text
90
+
91
+ # Save it to the cache
92
+ cache.write(filename, html)
93
+ else:
94
+ r = utils.fetch_if_not_cached(
95
+ filename, url, headers=request_headers, verify=request_verify
96
+ )
97
+ html = cache.read(filename)
70
98
 
71
99
  # Add it to the list
72
100
  html_list.append(html)
@@ -31,12 +31,14 @@ def scrape(
31
31
 
32
32
  Returns: the Path where the file is written
33
33
  """
34
+ ssl_verify = False # Problems with certificates in November 2024
35
+
34
36
  # Set up the cache
35
37
  cache = Cache(cache_dir)
36
38
 
37
39
  # Get the root URL
38
40
  url = "https://www.twc.texas.gov/data-reports/warn-notice"
39
- page = utils.get_url(url)
41
+ page = utils.get_url(url, verify=ssl_verify)
40
42
  html = page.text
41
43
 
42
44
  # Cache it
@@ -68,7 +70,7 @@ def scrape(
68
70
  # download the excel file
69
71
  year = _get_year(href)
70
72
  ext = _get_ext(href)
71
- excel_path = cache.download(f"tx/{year}{ext}", data_url)
73
+ excel_path = cache.download(f"tx/{year}{ext}", data_url, verify=ssl_verify)
72
74
 
73
75
  # Open it up
74
76
  workbook = load_workbook(filename=excel_path)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: warn-scraper
3
- Version: 1.2.83
3
+ Version: 1.2.85
4
4
  Summary: Command-line interface for downloading WARN Act notices of qualified plant closings and mass layoffs from state government websites
5
5
  Home-page: https://github.com/biglocalnews/warn-scraper
6
6
  Author: Big Local News
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes