warn-scraper 1.2.73__py3-none-any.whl → 1.2.74__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
warn/scrapers/ca.py CHANGED
@@ -224,20 +224,25 @@ def _extract_pdf_data(pdf_path):
224
224
  if "summary" in first_cell:
225
225
  continue
226
226
  for row in rows:
227
- data_row = {}
228
- for i, value in enumerate(row):
229
- this_raw_header = raw_header[i]
230
- this_clean_header = header_crosswalk[this_raw_header]
231
- data_row[this_clean_header] = value
232
- # Data clean-ups
233
- data_row.update(
234
- {
235
- "effective_date": data_row["effective_date"].replace(" ", ""),
236
- "received_date": data_row["received_date"].replace(" ", ""),
237
- "source_file": str(pdf_path).split("/")[-1],
238
- }
239
- )
240
- data.append(data_row)
227
+ # Summary rows have an extra field, and the above code does not
228
+ # block the summary table from being parsed if it jumps onto another page.
229
+ if len(row) != len(raw_header) + 1:
230
+ data_row = {}
231
+ for i, value in enumerate(row):
232
+ this_raw_header = raw_header[i]
233
+ this_clean_header = header_crosswalk[this_raw_header]
234
+ data_row[this_clean_header] = value
235
+ # Data clean-ups
236
+ data_row.update(
237
+ {
238
+ "effective_date": data_row["effective_date"].replace(
239
+ " ", ""
240
+ ),
241
+ "received_date": data_row["received_date"].replace(" ", ""),
242
+ "source_file": str(pdf_path).split("/")[-1],
243
+ }
244
+ )
245
+ data.append(data_row)
241
246
  return data
242
247
 
243
248
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: warn-scraper
3
- Version: 1.2.73
3
+ Version: 1.2.74
4
4
  Summary: Command-line interface for downloading WARN Act notices of qualified plant closings and mass layoffs from state government websites
5
5
  Home-page: https://github.com/biglocalnews/warn-scraper
6
6
  Author: Big Local News
@@ -28,7 +28,7 @@ warn/scrapers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
28
28
  warn/scrapers/ak.py,sha256=h7BYMTV0whwWAPhbzVDVKMMoVCFphKly70aiTHabPq4,1847
29
29
  warn/scrapers/al.py,sha256=D0rT9GQ0vwfkRuveVAt-Po-T6b2TI1EPGeLOBy2m3_M,2240
30
30
  warn/scrapers/az.py,sha256=elGbue01Gjf_DQ66Wy9qqGIOJsiY-KIKJOVeft8pCXg,1447
31
- warn/scrapers/ca.py,sha256=_LvkIci1nTUKBt5KC-wEcazWG7zoeUeadxj4D0XD97k,8170
31
+ warn/scrapers/ca.py,sha256=rBTB-6LmNIlbGCqrCtI3O-w2e_0kcVSFxvjvh4EHBlk,8511
32
32
  warn/scrapers/co.py,sha256=g076Zqe8XA8tbW03HP6-03mJV8fft1niHfa5Sy6me9A,7388
33
33
  warn/scrapers/ct.py,sha256=HLMmBSFhT5Y3vZQUwRyCTxiG5BMQXTfG3SEj5rkQEL4,4771
34
34
  warn/scrapers/dc.py,sha256=_sHLnVqK_W90QqJb_W88yDlgPjoMl63LYZP3CJfdN9g,4484
@@ -65,9 +65,9 @@ warn/scrapers/va.py,sha256=13lhkQrSkPGHEiWUuf1qiS890PWYE5gV-TgISpoiQnc,1711
65
65
  warn/scrapers/vt.py,sha256=d-bo4WK2hkrk4BhCCmLpEovcoZltlvdIUB6O0uaMx5A,1186
66
66
  warn/scrapers/wa.py,sha256=UXdVtHZo_a-XfoiyOooTRfTb9W3PErSZdKca6SRORgs,4282
67
67
  warn/scrapers/wi.py,sha256=ClEzXkwZbop0W4fkQgsb5oHAPUrb4luUPGV-jOKwkcg,4855
68
- warn_scraper-1.2.73.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
69
- warn_scraper-1.2.73.dist-info/METADATA,sha256=5FQltUNKR1LZmPu4Yqz8aqogBoQNNhSyISPC0SQ1sdg,2025
70
- warn_scraper-1.2.73.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
71
- warn_scraper-1.2.73.dist-info/entry_points.txt,sha256=poh_oSweObGlBSs1_2qZmnTodlOYD0KfO7-h7W2UQIw,47
72
- warn_scraper-1.2.73.dist-info/top_level.txt,sha256=gOhHgNEkrUvajlzoKkVOo-TlQht9MoXnKOErjzqLGHo,11
73
- warn_scraper-1.2.73.dist-info/RECORD,,
68
+ warn_scraper-1.2.74.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
69
+ warn_scraper-1.2.74.dist-info/METADATA,sha256=qUTAC44XVu9ARQFObvm3h9aJe0dAduopZX4-bP18i3k,2025
70
+ warn_scraper-1.2.74.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
71
+ warn_scraper-1.2.74.dist-info/entry_points.txt,sha256=poh_oSweObGlBSs1_2qZmnTodlOYD0KfO7-h7W2UQIw,47
72
+ warn_scraper-1.2.74.dist-info/top_level.txt,sha256=gOhHgNEkrUvajlzoKkVOo-TlQht9MoXnKOErjzqLGHo,11
73
+ warn_scraper-1.2.74.dist-info/RECORD,,