warn-scraper 1.2.73__py3-none-any.whl → 1.2.74__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- warn/scrapers/ca.py +19 -14
- {warn_scraper-1.2.73.dist-info → warn_scraper-1.2.74.dist-info}/METADATA +1 -1
- {warn_scraper-1.2.73.dist-info → warn_scraper-1.2.74.dist-info}/RECORD +7 -7
- {warn_scraper-1.2.73.dist-info → warn_scraper-1.2.74.dist-info}/LICENSE +0 -0
- {warn_scraper-1.2.73.dist-info → warn_scraper-1.2.74.dist-info}/WHEEL +0 -0
- {warn_scraper-1.2.73.dist-info → warn_scraper-1.2.74.dist-info}/entry_points.txt +0 -0
- {warn_scraper-1.2.73.dist-info → warn_scraper-1.2.74.dist-info}/top_level.txt +0 -0
warn/scrapers/ca.py
CHANGED
@@ -224,20 +224,25 @@ def _extract_pdf_data(pdf_path):
|
|
224
224
|
if "summary" in first_cell:
|
225
225
|
continue
|
226
226
|
for row in rows:
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
227
|
+
# Summary rows have an extra field, and the above code does not
|
228
|
+
# block the summary table from being parsed if it jumps onto another page.
|
229
|
+
if len(row) != len(raw_header) + 1:
|
230
|
+
data_row = {}
|
231
|
+
for i, value in enumerate(row):
|
232
|
+
this_raw_header = raw_header[i]
|
233
|
+
this_clean_header = header_crosswalk[this_raw_header]
|
234
|
+
data_row[this_clean_header] = value
|
235
|
+
# Data clean-ups
|
236
|
+
data_row.update(
|
237
|
+
{
|
238
|
+
"effective_date": data_row["effective_date"].replace(
|
239
|
+
" ", ""
|
240
|
+
),
|
241
|
+
"received_date": data_row["received_date"].replace(" ", ""),
|
242
|
+
"source_file": str(pdf_path).split("/")[-1],
|
243
|
+
}
|
244
|
+
)
|
245
|
+
data.append(data_row)
|
241
246
|
return data
|
242
247
|
|
243
248
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: warn-scraper
|
3
|
-
Version: 1.2.
|
3
|
+
Version: 1.2.74
|
4
4
|
Summary: Command-line interface for downloading WARN Act notices of qualified plant closings and mass layoffs from state government websites
|
5
5
|
Home-page: https://github.com/biglocalnews/warn-scraper
|
6
6
|
Author: Big Local News
|
@@ -28,7 +28,7 @@ warn/scrapers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
28
28
|
warn/scrapers/ak.py,sha256=h7BYMTV0whwWAPhbzVDVKMMoVCFphKly70aiTHabPq4,1847
|
29
29
|
warn/scrapers/al.py,sha256=D0rT9GQ0vwfkRuveVAt-Po-T6b2TI1EPGeLOBy2m3_M,2240
|
30
30
|
warn/scrapers/az.py,sha256=elGbue01Gjf_DQ66Wy9qqGIOJsiY-KIKJOVeft8pCXg,1447
|
31
|
-
warn/scrapers/ca.py,sha256=
|
31
|
+
warn/scrapers/ca.py,sha256=rBTB-6LmNIlbGCqrCtI3O-w2e_0kcVSFxvjvh4EHBlk,8511
|
32
32
|
warn/scrapers/co.py,sha256=g076Zqe8XA8tbW03HP6-03mJV8fft1niHfa5Sy6me9A,7388
|
33
33
|
warn/scrapers/ct.py,sha256=HLMmBSFhT5Y3vZQUwRyCTxiG5BMQXTfG3SEj5rkQEL4,4771
|
34
34
|
warn/scrapers/dc.py,sha256=_sHLnVqK_W90QqJb_W88yDlgPjoMl63LYZP3CJfdN9g,4484
|
@@ -65,9 +65,9 @@ warn/scrapers/va.py,sha256=13lhkQrSkPGHEiWUuf1qiS890PWYE5gV-TgISpoiQnc,1711
|
|
65
65
|
warn/scrapers/vt.py,sha256=d-bo4WK2hkrk4BhCCmLpEovcoZltlvdIUB6O0uaMx5A,1186
|
66
66
|
warn/scrapers/wa.py,sha256=UXdVtHZo_a-XfoiyOooTRfTb9W3PErSZdKca6SRORgs,4282
|
67
67
|
warn/scrapers/wi.py,sha256=ClEzXkwZbop0W4fkQgsb5oHAPUrb4luUPGV-jOKwkcg,4855
|
68
|
-
warn_scraper-1.2.
|
69
|
-
warn_scraper-1.2.
|
70
|
-
warn_scraper-1.2.
|
71
|
-
warn_scraper-1.2.
|
72
|
-
warn_scraper-1.2.
|
73
|
-
warn_scraper-1.2.
|
68
|
+
warn_scraper-1.2.74.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
69
|
+
warn_scraper-1.2.74.dist-info/METADATA,sha256=qUTAC44XVu9ARQFObvm3h9aJe0dAduopZX4-bP18i3k,2025
|
70
|
+
warn_scraper-1.2.74.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
71
|
+
warn_scraper-1.2.74.dist-info/entry_points.txt,sha256=poh_oSweObGlBSs1_2qZmnTodlOYD0KfO7-h7W2UQIw,47
|
72
|
+
warn_scraper-1.2.74.dist-info/top_level.txt,sha256=gOhHgNEkrUvajlzoKkVOo-TlQht9MoXnKOErjzqLGHo,11
|
73
|
+
warn_scraper-1.2.74.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|