warn-scraper 1.2.86__tar.gz → 1.2.88__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. {warn_scraper-1.2.86/warn_scraper.egg-info → warn_scraper-1.2.88}/PKG-INFO +1 -1
  2. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/warn/scrapers/co.py +4 -0
  3. warn_scraper-1.2.88/warn/scrapers/va.py +79 -0
  4. {warn_scraper-1.2.86 → warn_scraper-1.2.88/warn_scraper.egg-info}/PKG-INFO +1 -1
  5. warn_scraper-1.2.86/warn/scrapers/va.py +0 -67
  6. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/.devcontainer/devcontainer.json +0 -0
  7. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/.github/dependabot.yml.disabled +0 -0
  8. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/.github/workflows/continuous-deployment.yml +0 -0
  9. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/.gitignore +0 -0
  10. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/.pre-commit-config.yaml +0 -0
  11. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/LICENSE +0 -0
  12. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/MANIFEST.in +0 -0
  13. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/Makefile +0 -0
  14. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/Pipfile +0 -0
  15. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/Pipfile.lock +0 -0
  16. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/README.md +0 -0
  17. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/docs/Makefile +0 -0
  18. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/docs/_static/R42693.pdf +0 -0
  19. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/docs/_static/gao-03-1003.pdf +0 -0
  20. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/docs/_static/releasing-actions-finished.png +0 -0
  21. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/docs/_static/releasing-actions-start.png +0 -0
  22. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/docs/_static/releasing-changelog-button.png +0 -0
  23. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/docs/_static/releasing-changelog-entered.png +0 -0
  24. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/docs/_static/releasing-draft-button.png +0 -0
  25. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/docs/_static/releasing-name-release.png +0 -0
  26. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/docs/_static/releasing-name-tag.png +0 -0
  27. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/docs/_static/releasing-publish-button.png +0 -0
  28. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/docs/_static/releasing-pypi.png +0 -0
  29. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/docs/_static/releasing-release-published.png +0 -0
  30. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/docs/_static/releasing-releases-button.png +0 -0
  31. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/docs/_static/releasing-tag-button.png +0 -0
  32. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/docs/_templates/sources.md.tmpl +0 -0
  33. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/docs/conf.py +0 -0
  34. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/docs/contributing.rst +0 -0
  35. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/docs/index.rst +0 -0
  36. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/docs/make.bat +0 -0
  37. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/docs/reference.rst +0 -0
  38. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/docs/releasing.md +0 -0
  39. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/docs/requirements.txt +0 -0
  40. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/docs/scrapers/al.md +0 -0
  41. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/docs/scrapers/az.md +0 -0
  42. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/docs/scrapers/ca.md +0 -0
  43. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/docs/scrapers/co.md +0 -0
  44. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/docs/scrapers/dc.md +0 -0
  45. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/docs/scrapers/de.md +0 -0
  46. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/docs/scrapers/ia.md +0 -0
  47. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/docs/scrapers/in.md +0 -0
  48. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/docs/scrapers/job_center.md +0 -0
  49. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/docs/scrapers/ks.md +0 -0
  50. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/docs/scrapers/md.md +0 -0
  51. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/docs/scrapers/me.md +0 -0
  52. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/docs/scrapers/mo.md +0 -0
  53. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/docs/scrapers/ny.md +0 -0
  54. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/docs/scrapers/ok.md +0 -0
  55. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/docs/scrapers/or.md +0 -0
  56. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/docs/scrapers/sc.md +0 -0
  57. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/docs/scrapers/tx.md +0 -0
  58. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/docs/scrapers/ut.md +0 -0
  59. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/docs/scrapers/va.md +0 -0
  60. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/docs/scrapers/vt.md +0 -0
  61. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/docs/scrapers/wi.md +0 -0
  62. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/docs/sources.md +0 -0
  63. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/docs/usage.md +0 -0
  64. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/setup.cfg +0 -0
  65. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/setup.py +0 -0
  66. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/tests/__init__.py +0 -0
  67. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/tests/cassettes/test_cached_detail_pages.yaml +0 -0
  68. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/tests/cassettes/test_cached_search_results.yaml +0 -0
  69. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/tests/cassettes/test_missing_detail_page_values.yaml +0 -0
  70. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/tests/cassettes/test_no_results.yaml +0 -0
  71. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/tests/cassettes/test_paged_results.yaml +0 -0
  72. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/tests/cassettes/test_scrape_integration.yaml +0 -0
  73. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/tests/conftest.py +0 -0
  74. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/tests/fixtures/2021_page_1.html +0 -0
  75. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/tests/fixtures/2021_page_2.html +0 -0
  76. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/tests/test_cache.py +0 -0
  77. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/tests/test_delete.py +0 -0
  78. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/tests/test_job_center.py +0 -0
  79. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/tests/test_job_center_cache.py +0 -0
  80. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/tests/test_openpyxl.py +0 -0
  81. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/warn/__init__.py +0 -0
  82. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/warn/cache.py +0 -0
  83. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/warn/cli.py +0 -0
  84. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/warn/platforms/__init__.py +0 -0
  85. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/warn/platforms/job_center/__init__.py +0 -0
  86. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/warn/platforms/job_center/cache.py +0 -0
  87. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/warn/platforms/job_center/site.py +0 -0
  88. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/warn/platforms/job_center/urls.py +0 -0
  89. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/warn/platforms/job_center/utils.py +0 -0
  90. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/warn/runner.py +0 -0
  91. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/warn/scrapers/__init__.py +0 -0
  92. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/warn/scrapers/ak.py +0 -0
  93. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/warn/scrapers/al.py +0 -0
  94. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/warn/scrapers/az.py +0 -0
  95. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/warn/scrapers/ca.py +0 -0
  96. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/warn/scrapers/ct.py +0 -0
  97. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/warn/scrapers/dc.py +0 -0
  98. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/warn/scrapers/de.py +0 -0
  99. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/warn/scrapers/fl.py +0 -0
  100. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/warn/scrapers/ga.py +0 -0
  101. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/warn/scrapers/hi.py +0 -0
  102. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/warn/scrapers/ia.py +0 -0
  103. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/warn/scrapers/id.py +0 -0
  104. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/warn/scrapers/il.py +0 -0
  105. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/warn/scrapers/in.py +0 -0
  106. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/warn/scrapers/ks.py +0 -0
  107. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/warn/scrapers/ky.py +0 -0
  108. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/warn/scrapers/la.py +0 -0
  109. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/warn/scrapers/md.py +0 -0
  110. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/warn/scrapers/me.py +0 -0
  111. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/warn/scrapers/mi.py +0 -0
  112. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/warn/scrapers/mo.py +0 -0
  113. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/warn/scrapers/mt.py +0 -0
  114. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/warn/scrapers/ne.py +0 -0
  115. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/warn/scrapers/nj.py +0 -0
  116. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/warn/scrapers/nm.py +0 -0
  117. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/warn/scrapers/ny.py +0 -0
  118. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/warn/scrapers/oh.py +0 -0
  119. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/warn/scrapers/ok.py +0 -0
  120. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/warn/scrapers/or.py +0 -0
  121. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/warn/scrapers/ri.py +0 -0
  122. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/warn/scrapers/sc.py +0 -0
  123. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/warn/scrapers/sd.py +0 -0
  124. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/warn/scrapers/tn.py +0 -0
  125. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/warn/scrapers/tx.py +0 -0
  126. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/warn/scrapers/ut.py +0 -0
  127. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/warn/scrapers/vt.py +0 -0
  128. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/warn/scrapers/wa.py +0 -0
  129. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/warn/scrapers/wi.py +0 -0
  130. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/warn/utils.py +0 -0
  131. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/warn_scraper.egg-info/SOURCES.txt +0 -0
  132. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/warn_scraper.egg-info/dependency_links.txt +0 -0
  133. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/warn_scraper.egg-info/entry_points.txt +0 -0
  134. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/warn_scraper.egg-info/not-zip-safe +0 -0
  135. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/warn_scraper.egg-info/requires.txt +0 -0
  136. {warn_scraper-1.2.86 → warn_scraper-1.2.88}/warn_scraper.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: warn-scraper
3
- Version: 1.2.86
3
+ Version: 1.2.88
4
4
  Summary: Command-line interface for downloading WARN Act notices of qualified plant closings and mass layoffs from state government websites
5
5
  Home-page: https://github.com/biglocalnews/warn-scraper
6
6
  Author: Big Local News
@@ -123,7 +123,11 @@ def scrape(
123
123
  "Occupations Impacted": "occupations",
124
124
  "Occupations": "occupations",
125
125
  "Select the workforce area": "workforce_area",
126
+ "Total CO": "jobs",
127
+ "CO Layoffs": "jobs",
126
128
  "Total number of permanent layoffs": "permanent_job_losses",
129
+ "# permanent": "permanent_job_losses",
130
+ "# Permanent": "permanent_job_losses",
127
131
  "Total number of temporary layoffs": "temporary_job_losses",
128
132
  "Total number of furloughs": "furloughs",
129
133
  "Begin date of layoffs": "begin_date",
@@ -0,0 +1,79 @@
1
+ import logging
2
+ from pathlib import Path
3
+
4
+ from .. import utils
5
+ from ..cache import Cache
6
+
7
+ # from bs4 import BeautifulSoup, Tag
8
+
9
+
10
+ __authors__ = ["zstumgoren", "Dilcia19", "shallotly"]
11
+ __tags__ = ["html", "csv"]
12
+ __source__ = {
13
+ "name": "Virginia Employment Commission",
14
+ "url": "https://www.vec.virginia.gov/warn-notices",
15
+ }
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ def scrape(
21
+ data_dir: Path = utils.WARN_DATA_DIR,
22
+ cache_dir: Path = utils.WARN_CACHE_DIR,
23
+ ) -> Path:
24
+ """
25
+ Scrape data from Virginia.
26
+
27
+ Keyword arguments:
28
+ data_dir -- the Path were the result will be saved (default WARN_DATA_DIR)
29
+ cache_dir -- the Path where results can be cached (default WARN_CACHE_DIR)
30
+
31
+ Returns: the Path where the file is written
32
+ """
33
+ # This scraper initially tried to get a CSV download link that was only for the most recent entries. The scraping part of that broke.
34
+ # It's now hard-coded to a particular download link with parameters that should get the full thing.
35
+
36
+ # This may break again, but this revised attempt has far fewer moving parts and actually fetches the complete data set.
37
+ # Blame Stucka in December 2024.
38
+
39
+ # Get the WARN page
40
+ # url = "https://www.vec.virginia.gov/warn-notices"
41
+ # url = "https://vec.virginia.gov/warn-notices?field_notice_date_value%5Bmin%5D%5Bdate%5D=1%2F1%2F1990&field_notice_date_value%5Bmax%5D%5Bdate%5D=&field_region_warn_tid=All"
42
+ # r = utils.get_url(url, verify=True)
43
+ # html = r.text
44
+
45
+ # Save it to the cache
46
+ cache = Cache(cache_dir)
47
+ # cache.write("va/source.html", html)
48
+
49
+ # Parse out the CSV download link
50
+ # soup = BeautifulSoup(html, "html.parser")
51
+ # csv_link = soup.find("a", text="Download")
52
+ # if isinstance(csv_link, Tag):
53
+ # csv_href = csv_link["href"]
54
+ # else:
55
+ # raise ValueError("Could not find CSV link")
56
+
57
+ # csv_href = "/warn-notices-csv.csv?"
58
+ # csv_url = f"https://www.vec.virginia.gov{csv_href}"
59
+
60
+ csv_url = "https://vec.virginia.gov/warn-notices-csv.csv?field_notice_date_value%5Bmin%5D%5Bdate%5D=1%2F1%2F1990&field_notice_date_value%5Bmax%5D%5Bdate%5D=&field_region_warn_tid=All"
61
+
62
+ # Download it to the cache
63
+ cache.download("va/source.csv", csv_url, verify=True)
64
+
65
+ # Open it up as a list of rows
66
+ csv_rows = cache.read_csv("va/source.csv")
67
+
68
+ # Set the export path
69
+ data_path = data_dir / "va.csv"
70
+
71
+ # Write out the file
72
+ utils.write_rows_to_csv(data_path, csv_rows)
73
+
74
+ # Return the export path
75
+ return data_path
76
+
77
+
78
+ if __name__ == "__main__":
79
+ scrape()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: warn-scraper
3
- Version: 1.2.86
3
+ Version: 1.2.88
4
4
  Summary: Command-line interface for downloading WARN Act notices of qualified plant closings and mass layoffs from state government websites
5
5
  Home-page: https://github.com/biglocalnews/warn-scraper
6
6
  Author: Big Local News
@@ -1,67 +0,0 @@
1
- import logging
2
- from pathlib import Path
3
-
4
- from bs4 import BeautifulSoup, Tag
5
-
6
- from .. import utils
7
- from ..cache import Cache
8
-
9
- __authors__ = ["zstumgoren", "Dilcia19", "shallotly"]
10
- __tags__ = ["html", "csv"]
11
- __source__ = {
12
- "name": "Virginia Employment Commission",
13
- "url": "https://www.vec.virginia.gov/warn-notices",
14
- }
15
-
16
- logger = logging.getLogger(__name__)
17
-
18
-
19
- def scrape(
20
- data_dir: Path = utils.WARN_DATA_DIR,
21
- cache_dir: Path = utils.WARN_CACHE_DIR,
22
- ) -> Path:
23
- """
24
- Scrape data from Virginia.
25
-
26
- Keyword arguments:
27
- data_dir -- the Path were the result will be saved (default WARN_DATA_DIR)
28
- cache_dir -- the Path where results can be cached (default WARN_CACHE_DIR)
29
-
30
- Returns: the Path where the file is written
31
- """
32
- # Get the WARN page
33
- url = "https://www.vec.virginia.gov/warn-notices"
34
- r = utils.get_url(url, verify=False)
35
- html = r.text
36
-
37
- # Save it to the cache
38
- cache = Cache(cache_dir)
39
- cache.write("va/source.html", html)
40
-
41
- # Parse out the CSV download link
42
- soup = BeautifulSoup(html, "html.parser")
43
- csv_link = soup.find("a", text="Download")
44
- if isinstance(csv_link, Tag):
45
- csv_href = csv_link["href"]
46
- else:
47
- raise ValueError("Could not find CSV link")
48
- csv_url = f"https://www.vec.virginia.gov{csv_href}"
49
-
50
- # Download it to the cache
51
- cache.download("va/source.csv", csv_url, verify=False)
52
-
53
- # Open it up as a list of rows
54
- csv_rows = cache.read_csv("va/source.csv")
55
-
56
- # Set the export path
57
- data_path = data_dir / "va.csv"
58
-
59
- # Write out the file
60
- utils.write_rows_to_csv(data_path, csv_rows)
61
-
62
- # Return the export path
63
- return data_path
64
-
65
-
66
- if __name__ == "__main__":
67
- scrape()
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes