warn-scraper 1.2.56__py3-none-any.whl → 1.2.57__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- warn/scrapers/hi.py +9 -6
- {warn_scraper-1.2.56.dist-info → warn_scraper-1.2.57.dist-info}/METADATA +1 -1
- {warn_scraper-1.2.56.dist-info → warn_scraper-1.2.57.dist-info}/RECORD +7 -7
- {warn_scraper-1.2.56.dist-info → warn_scraper-1.2.57.dist-info}/LICENSE +0 -0
- {warn_scraper-1.2.56.dist-info → warn_scraper-1.2.57.dist-info}/WHEEL +0 -0
- {warn_scraper-1.2.56.dist-info → warn_scraper-1.2.57.dist-info}/entry_points.txt +0 -0
- {warn_scraper-1.2.56.dist-info → warn_scraper-1.2.57.dist-info}/top_level.txt +0 -0
warn/scrapers/hi.py
CHANGED
@@ -1,13 +1,14 @@
|
|
1
1
|
import datetime
|
2
2
|
import logging
|
3
3
|
from pathlib import Path
|
4
|
+
from urllib.parse import quote
|
4
5
|
|
5
6
|
from bs4 import BeautifulSoup
|
6
7
|
|
7
8
|
from .. import utils
|
8
9
|
|
9
10
|
__authors__ = ["Ash1R", "stucka"]
|
10
|
-
__tags__ = ["html"]
|
11
|
+
__tags__ = ["html", "pdf"]
|
11
12
|
__source__ = {
|
12
13
|
"name": "Workforce Development Hawaii",
|
13
14
|
"url": "https://labor.hawaii.gov/wdc/real-time-warn-updates/",
|
@@ -28,15 +29,17 @@ def scrape(
|
|
28
29
|
cache_dir -- the Path where results can be cached (default WARN_CACHE_DIR)
|
29
30
|
Returns: the Path where the file is written
|
30
31
|
"""
|
31
|
-
|
32
|
+
cacheprefix = "https://webcache.googleusercontent.com/search?q=cache%3A" # Use Google Cache, per #600
|
33
|
+
|
34
|
+
firstpage = utils.get_url(cacheprefix + quote("https://labor.hawaii.gov/wdc/real-time-warn-updates/"))
|
32
35
|
soup = BeautifulSoup(firstpage.text, features="html5lib")
|
33
36
|
pagesection = soup.select("div.primary-content")[0]
|
34
37
|
subpageurls = []
|
35
38
|
for atag in pagesection.find_all("a"):
|
36
39
|
href = atag["href"]
|
37
40
|
if href.endswith("/"):
|
38
|
-
href = href[:-1]
|
39
|
-
subpageurls.append(href)
|
41
|
+
href = href # [:-1]
|
42
|
+
subpageurls.append(cacheprefix + quote(href))
|
40
43
|
|
41
44
|
headers = ["Company", "Date", "PDF url", "location", "jobs"]
|
42
45
|
data = [headers]
|
@@ -85,8 +88,8 @@ def scrape(
|
|
85
88
|
row.append(dates[i])
|
86
89
|
|
87
90
|
row.append(url)
|
88
|
-
row.append(None)
|
89
|
-
row.append(None)
|
91
|
+
row.append(None) # location
|
92
|
+
row.append(None) # jobs
|
90
93
|
data.append(row)
|
91
94
|
|
92
95
|
output_csv = data_dir / "hi.csv"
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: warn-scraper
|
3
|
-
Version: 1.2.
|
3
|
+
Version: 1.2.57
|
4
4
|
Summary: Command-line interface for downloading WARN Act notices of qualified plant closings and mass layoffs from state government websites
|
5
5
|
Home-page: https://github.com/biglocalnews/warn-scraper
|
6
6
|
Author: Big Local News
|
@@ -36,7 +36,7 @@ warn/scrapers/dc.py,sha256=kAWmERzEIOtGHla9tn8hK0NfP4B-aP4jknNGUiPw4C4,4493
|
|
36
36
|
warn/scrapers/de.py,sha256=yMpCFAAlIO2f4DVUQpPKKLzm52_Zpn9IuPPFBNX1pjQ,1386
|
37
37
|
warn/scrapers/fl.py,sha256=mHymxjwNGtYXRxAwjUSIG1qeSp4Y_zzr4XDxPz9LAfY,9560
|
38
38
|
warn/scrapers/ga.py,sha256=EuqBrMlBojH6eXOHisNqJAQcsnb8FPHDwWomNopw9Ys,7285
|
39
|
-
warn/scrapers/hi.py,sha256=
|
39
|
+
warn/scrapers/hi.py,sha256=IrwgUMNPqsHExiLZ8dFM25am7KTtVRrLDjIesNtJIsk,3736
|
40
40
|
warn/scrapers/ia.py,sha256=zOncaA9M0d6paT4pB7UU_4D_yxUgeUiGRcnpKi9DsRA,1999
|
41
41
|
warn/scrapers/id.py,sha256=rRkE9g9ZOL7JgTdIz46kyGOeetzSla3e1Xr6gJ1v_74,5443
|
42
42
|
warn/scrapers/il.py,sha256=sygdvsNuB_Gvu3o_HidtpSP4FLz0szKb1zEHqGxVtlI,1563
|
@@ -66,9 +66,9 @@ warn/scrapers/va.py,sha256=DDuR4_2Jpaxg9nVmuM6PAR8v8xz3VgxTBG5sWJgz2q0,1582
|
|
66
66
|
warn/scrapers/vt.py,sha256=d-bo4WK2hkrk4BhCCmLpEovcoZltlvdIUB6O0uaMx5A,1186
|
67
67
|
warn/scrapers/wa.py,sha256=Il3RmJpKr7SbwUBxHxlhEFLoxy7zSiduyo8F2EddB2Y,4021
|
68
68
|
warn/scrapers/wi.py,sha256=ClEzXkwZbop0W4fkQgsb5oHAPUrb4luUPGV-jOKwkcg,4855
|
69
|
-
warn_scraper-1.2.
|
70
|
-
warn_scraper-1.2.
|
71
|
-
warn_scraper-1.2.
|
72
|
-
warn_scraper-1.2.
|
73
|
-
warn_scraper-1.2.
|
74
|
-
warn_scraper-1.2.
|
69
|
+
warn_scraper-1.2.57.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
70
|
+
warn_scraper-1.2.57.dist-info/METADATA,sha256=Z__kggmqalQaYaWaynUtNFXqNiBW9X8u7SDh0Y3qjkI,2025
|
71
|
+
warn_scraper-1.2.57.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
72
|
+
warn_scraper-1.2.57.dist-info/entry_points.txt,sha256=poh_oSweObGlBSs1_2qZmnTodlOYD0KfO7-h7W2UQIw,47
|
73
|
+
warn_scraper-1.2.57.dist-info/top_level.txt,sha256=gOhHgNEkrUvajlzoKkVOo-TlQht9MoXnKOErjzqLGHo,11
|
74
|
+
warn_scraper-1.2.57.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|