warn-scraper 1.2.146.dev0__py3-none-any.whl → 1.2.149.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- warn/scrapers/mi.py +1 -0
- warn/scrapers/nm.py +5 -1
- warn/scrapers/ny.py +8 -0
- warn/scrapers/oh.py +2 -0
- {warn_scraper-1.2.146.dev0.dist-info → warn_scraper-1.2.149.dev0.dist-info}/METADATA +1 -1
- {warn_scraper-1.2.146.dev0.dist-info → warn_scraper-1.2.149.dev0.dist-info}/RECORD +10 -10
- {warn_scraper-1.2.146.dev0.dist-info → warn_scraper-1.2.149.dev0.dist-info}/WHEEL +0 -0
- {warn_scraper-1.2.146.dev0.dist-info → warn_scraper-1.2.149.dev0.dist-info}/entry_points.txt +0 -0
- {warn_scraper-1.2.146.dev0.dist-info → warn_scraper-1.2.149.dev0.dist-info}/licenses/LICENSE +0 -0
- {warn_scraper-1.2.146.dev0.dist-info → warn_scraper-1.2.149.dev0.dist-info}/top_level.txt +0 -0
warn/scrapers/mi.py
CHANGED
warn/scrapers/nm.py
CHANGED
|
@@ -52,11 +52,15 @@ def scrape(
|
|
|
52
52
|
document = BeautifulSoup(html, "html.parser")
|
|
53
53
|
links = document.find_all("a")
|
|
54
54
|
pdf_urls = [
|
|
55
|
-
f"{
|
|
55
|
+
f"{link['href']}"
|
|
56
56
|
for link in links
|
|
57
57
|
if "WARN" in link.get("href", "") and link.get("href", "").endswith(".pdf")
|
|
58
58
|
]
|
|
59
59
|
|
|
60
|
+
for i, pdf_url in enumerate(pdf_urls):
|
|
61
|
+
if "http" not in pdf_url:
|
|
62
|
+
pdf_urls[i] = base_url + pdf_url
|
|
63
|
+
|
|
60
64
|
output_rows = []
|
|
61
65
|
|
|
62
66
|
for pdf_index, pdf_url in enumerate(pdf_urls):
|
warn/scrapers/ny.py
CHANGED
|
@@ -41,6 +41,14 @@ def scrape(
|
|
|
41
41
|
Append .csv to the end of that URL:
|
|
42
42
|
https://public.tableau.com/app/profile/kylee.teague2482/viz/WorkerAdjustmentRetrainingNotificationWARN/WARN.csv
|
|
43
43
|
Try it in requests, no good. Try it in browser again. File downloads. Find it in the downloads section of the browser. Right-click, copy download link, try that in requests and ... it worked?
|
|
44
|
+
|
|
45
|
+
In 2026, the previous implementation broke. .csv appending gets a 403.
|
|
46
|
+
Wondering if we should be parsing this:
|
|
47
|
+
https://public.tableau.com/workbooks/WorkerAdjustmentRetrainingNotificationWARN.twb
|
|
48
|
+
using the Tableau API
|
|
49
|
+
https://github.com/tableau/document-api-python
|
|
50
|
+
|
|
51
|
+
|
|
44
52
|
"""
|
|
45
53
|
|
|
46
54
|
url = "https://public.tableau.com/views/WorkerAdjustmentRetrainingNotificationWARN/WARN.csv?%3Adisplay_static_image=y&%3AbootstrapWhenNotified=true&%3Aembed=true&%3Alanguage=en-US&:embed=y&:showVizHome=n&:apiID=host0#navType=0&navSrc=Parse"
|
warn/scrapers/oh.py
CHANGED
|
@@ -49,6 +49,8 @@ def scrape(
|
|
|
49
49
|
if isinstance(data_div, Tag):
|
|
50
50
|
data = json.loads(data_div.decode_contents().strip())["data"]
|
|
51
51
|
else:
|
|
52
|
+
logger.debug("!!!!!!!!!!!!!!!!!!!! Could not find JSON data div")
|
|
53
|
+
logger.debug(soup)
|
|
52
54
|
raise ValueError("Could not find JSON data div")
|
|
53
55
|
rawheaders = data[1]
|
|
54
56
|
logger.debug(f"Found headers: {rawheaders}")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: warn-scraper
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.149.dev0
|
|
4
4
|
Summary: Command-line interface for downloading WARN Act notices of qualified plant closings and mass layoffs from state government websites
|
|
5
5
|
Author-email: Big Local News <biglocalnews@stanford.edu>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -30,14 +30,14 @@ warn/scrapers/ky.py,sha256=7kJTNOzxChyXlcyBImmdwwmrczYksU8XNxbhQ2owmJs,9688
|
|
|
30
30
|
warn/scrapers/la.py,sha256=ORkMOQErl33SEiagOli4agDLdTt0R1MxxBmqOg3hNv8,13175
|
|
31
31
|
warn/scrapers/md.py,sha256=hwgxXQnhyBWm8qF1dvxIThAX1MkrZbXLwRI9inO5t8g,4060
|
|
32
32
|
warn/scrapers/me.py,sha256=q36F4yJ7hvZsLayA3uBS1romo4X3Qf-sEi2Y7LAQCi8,1172
|
|
33
|
-
warn/scrapers/mi.py,sha256=
|
|
33
|
+
warn/scrapers/mi.py,sha256=n-fgU44FkpFeWJ0Fis-pQR75AyFMGJqrEc93x2SIphc,5805
|
|
34
34
|
warn/scrapers/mo.py,sha256=wnnwQAiVPwuheMqptMXZpyQdiKNghhKwTO-Bnh9oXoU,3492
|
|
35
35
|
warn/scrapers/mt.py,sha256=t2MP4OCcuCEnrnvNgOu289P0eekZq4XaCK65qzgZX88,2457
|
|
36
36
|
warn/scrapers/ne.py,sha256=JawuGJ3tCKvMd-N-p03gnltB4rol4QUJshMk2oyMPO4,4143
|
|
37
37
|
warn/scrapers/nj.py,sha256=nwbMbeQuUJbYRVoyUyKZBmNqvqsXu3Habt-10r8DvZE,2230
|
|
38
|
-
warn/scrapers/nm.py,sha256=
|
|
39
|
-
warn/scrapers/ny.py,sha256=
|
|
40
|
-
warn/scrapers/oh.py,sha256
|
|
38
|
+
warn/scrapers/nm.py,sha256=55kDh65uOc-uMO8wuxPHhHN4Aw8Di2Fx66JBds8f2Io,3694
|
|
39
|
+
warn/scrapers/ny.py,sha256=_nYoawIde4yYtk8-tMG3hekLlSlrKKqULmkBWOUoQJM,2586
|
|
40
|
+
warn/scrapers/oh.py,sha256=-_MyuaUGyKgd-XiXMVnf6AHkqOBgAFg0NRRpXGLz-3A,3431
|
|
41
41
|
warn/scrapers/ok.py,sha256=ZZciyR1jPS4SzS2JSQwhJsDXP_VxA9UkEQvLpxzWzp4,7676
|
|
42
42
|
warn/scrapers/or.py,sha256=0PjyrW3CHdxtHhqEo3Ob-9B6YckACoBD3K0c4FPQUcg,5208
|
|
43
43
|
warn/scrapers/pa.py,sha256=j6cYZjKJUht2s829VPAbI2IRJazkqOwbwcVMAcItuIc,5410
|
|
@@ -51,9 +51,9 @@ warn/scrapers/va.py,sha256=7Nle7qL0VNPiE653XyaP9HQqSfuJFDRr2kEkjOqLvFM,11269
|
|
|
51
51
|
warn/scrapers/vt.py,sha256=d-bo4WK2hkrk4BhCCmLpEovcoZltlvdIUB6O0uaMx5A,1186
|
|
52
52
|
warn/scrapers/wa.py,sha256=UXdVtHZo_a-XfoiyOooTRfTb9W3PErSZdKca6SRORgs,4282
|
|
53
53
|
warn/scrapers/wi.py,sha256=ClEzXkwZbop0W4fkQgsb5oHAPUrb4luUPGV-jOKwkcg,4855
|
|
54
|
-
warn_scraper-1.2.
|
|
55
|
-
warn_scraper-1.2.
|
|
56
|
-
warn_scraper-1.2.
|
|
57
|
-
warn_scraper-1.2.
|
|
58
|
-
warn_scraper-1.2.
|
|
59
|
-
warn_scraper-1.2.
|
|
54
|
+
warn_scraper-1.2.149.dev0.dist-info/licenses/LICENSE,sha256=ZV-QHyqPwyMuwuj0lI05JeSjV1NyzVEk8Yeu7FPtYS0,585
|
|
55
|
+
warn_scraper-1.2.149.dev0.dist-info/METADATA,sha256=VQt1KDfh3oqeCaGriI-MZd5C_0QuXHnOTbCiYhQV42k,1780
|
|
56
|
+
warn_scraper-1.2.149.dev0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
57
|
+
warn_scraper-1.2.149.dev0.dist-info/entry_points.txt,sha256=poh_oSweObGlBSs1_2qZmnTodlOYD0KfO7-h7W2UQIw,47
|
|
58
|
+
warn_scraper-1.2.149.dev0.dist-info/top_level.txt,sha256=dZfms6N3kqVXufiPOo7YqOrAcUtYfNH_oyGvYUk9FB4,5
|
|
59
|
+
warn_scraper-1.2.149.dev0.dist-info/RECORD,,
|
|
File without changes
|
{warn_scraper-1.2.146.dev0.dist-info → warn_scraper-1.2.149.dev0.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
{warn_scraper-1.2.146.dev0.dist-info → warn_scraper-1.2.149.dev0.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|
|
File without changes
|