warn-scraper 1.2.80__py3-none-any.whl → 1.2.82__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
warn/scrapers/md.py CHANGED
@@ -1,6 +1,7 @@
1
1
  import logging
2
2
  import re
3
3
  from pathlib import Path
4
+ from time import sleep
4
5
 
5
6
  from bs4 import BeautifulSoup
6
7
 
@@ -16,6 +17,8 @@ __source__ = {
16
17
 
17
18
  logger = logging.getLogger(__name__)
18
19
 
20
+ naptime = 3
21
+
19
22
 
20
23
  def scrape(
21
24
  data_dir: Path = utils.WARN_DATA_DIR,
@@ -42,6 +45,8 @@ def scrape(
42
45
  # Save it to the cache
43
46
  cache.write("md/source.html", html)
44
47
 
48
+ sleep(naptime) # Try to stop blocked connections by being less aggressive
49
+
45
50
  # Parse the list of links
46
51
  soup = BeautifulSoup(html, "html.parser")
47
52
  a_list = soup.find_all("a", {"class": "sub"})
@@ -61,6 +66,8 @@ def scrape(
61
66
  # Save it to the cache
62
67
  cache.write(f"md/{href}.html", html)
63
68
 
69
+ sleep(naptime) # Try to stop blocked connections by being less aggressive
70
+
64
71
  # Add it to the list
65
72
  html_list.append(html)
66
73
 
warn/scrapers/ri.py CHANGED
@@ -61,6 +61,7 @@ def scrape(
61
61
  dirty_list.extend(localrows)
62
62
 
63
63
  headers = dirty_list[1] # Skip false header at position 0
64
+ headers = [x for x in headers if x is not None]
64
65
  headers[2] = (
65
66
  headers[2]
66
67
  .replace("Company Name ", "Company Name")
@@ -85,9 +86,14 @@ def scrape(
85
86
  logger.debug(f"Got : {row}")
86
87
  else:
87
88
  line = {}
88
- for i, fieldname in enumerate(headers):
89
- line[fieldname] = row[i]
90
- row_list.append(line)
89
+ if len(headers) > len(row):
90
+ logger.debug(
91
+ f"{len(row)} items found, vs. expected {len(headers)}. Dropping row: {row}"
92
+ )
93
+ else:
94
+ for i, fieldname in enumerate(headers):
95
+ line[fieldname] = row[i]
96
+ row_list.append(line)
91
97
  # dirty_list = None
92
98
  logger.debug(
93
99
  f"Successfully merged {len(row_list)-1:,} records from new spreadsheet."
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: warn-scraper
3
- Version: 1.2.80
3
+ Version: 1.2.82
4
4
  Summary: Command-line interface for downloading WARN Act notices of qualified plant closings and mass layoffs from state government websites
5
5
  Home-page: https://github.com/biglocalnews/warn-scraper
6
6
  Author: Big Local News
@@ -43,7 +43,7 @@ warn/scrapers/in.py,sha256=dAT40ROhhKiwLcwa_YJ6EyhsYBLe0IX2rOWXmNa6JMs,2026
43
43
  warn/scrapers/ks.py,sha256=F_3biEMF7zgCX2XVuUACR74Vyzapta4SaM9SY3EuZCU,1266
44
44
  warn/scrapers/ky.py,sha256=XjIojMpaoKbypa7l23IybP02jBijBCJG5UGqfO-EYjg,4365
45
45
  warn/scrapers/la.py,sha256=60z-4LZY5xp6aX8r6HGGW3FaOVEGnxlG2Mfgpt4G2WE,12877
46
- warn/scrapers/md.py,sha256=PHvL81V0AwAhYhTTEmXBhbfZMBrBJipvUwXKSpizifA,2845
46
+ warn/scrapers/md.py,sha256=e-tiiKwr9dNEemtk7SWMv317Nv-qEDf5xPNcMU8AZDQ,3045
47
47
  warn/scrapers/me.py,sha256=q36F4yJ7hvZsLayA3uBS1romo4X3Qf-sEi2Y7LAQCi8,1172
48
48
  warn/scrapers/mi.py,sha256=9clZ9mATEJwdVLzDo_h66rK0aV5Zc7GGQ7AauutS6Wo,3591
49
49
  warn/scrapers/mo.py,sha256=wnnwQAiVPwuheMqptMXZpyQdiKNghhKwTO-Bnh9oXoU,3492
@@ -55,7 +55,7 @@ warn/scrapers/ny.py,sha256=kuBdgF1C_GnHEWlaLAas6zJG2Xhfj3c4q_-tuJLX2rk,3615
55
55
  warn/scrapers/oh.py,sha256=2MEB_0AT37dsAsrhdl_Y0LUNHu0xGy4B1F7aSMhuUu0,3151
56
56
  warn/scrapers/ok.py,sha256=qJE49VY6dMhbokFB9IAOL2XyuYSJpEKKxITPO9sUHS4,1197
57
57
  warn/scrapers/or.py,sha256=0PjyrW3CHdxtHhqEo3Ob-9B6YckACoBD3K0c4FPQUcg,5208
58
- warn/scrapers/ri.py,sha256=vBbXFP5ClvqlOc_srR8sHsA8lpi7eLuMYm7ydUY5Fxo,4163
58
+ warn/scrapers/ri.py,sha256=EUyLy59eNiYHqiJR8C0YcJrZtp09KyVc45AFD0_Uc0U,4497
59
59
  warn/scrapers/sc.py,sha256=p3kscSNSW9C8C5QaSUbCAo6XibgB7G2iH6zaMH7Mnsc,4819
60
60
  warn/scrapers/sd.py,sha256=_4R19Ybzsyx1PvcWV3_laJmJ3etrwVGfhNEQm6njwoA,1904
61
61
  warn/scrapers/tn.py,sha256=i1H7c09Ea3CDrTXqqRMLBMPT_34QtGA0-x7T8rm_j5Q,2945
@@ -65,9 +65,9 @@ warn/scrapers/va.py,sha256=13lhkQrSkPGHEiWUuf1qiS890PWYE5gV-TgISpoiQnc,1711
65
65
  warn/scrapers/vt.py,sha256=d-bo4WK2hkrk4BhCCmLpEovcoZltlvdIUB6O0uaMx5A,1186
66
66
  warn/scrapers/wa.py,sha256=UXdVtHZo_a-XfoiyOooTRfTb9W3PErSZdKca6SRORgs,4282
67
67
  warn/scrapers/wi.py,sha256=ClEzXkwZbop0W4fkQgsb5oHAPUrb4luUPGV-jOKwkcg,4855
68
- warn_scraper-1.2.80.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
69
- warn_scraper-1.2.80.dist-info/METADATA,sha256=R3hOSq76-i_I4IBWW1NX1cGg7SoJM8PNh2dBMefHqUs,2025
70
- warn_scraper-1.2.80.dist-info/WHEEL,sha256=Wyh-_nZ0DJYolHNn1_hMa4lM7uDedD_RGVwbmTjyItk,91
71
- warn_scraper-1.2.80.dist-info/entry_points.txt,sha256=poh_oSweObGlBSs1_2qZmnTodlOYD0KfO7-h7W2UQIw,47
72
- warn_scraper-1.2.80.dist-info/top_level.txt,sha256=gOhHgNEkrUvajlzoKkVOo-TlQht9MoXnKOErjzqLGHo,11
73
- warn_scraper-1.2.80.dist-info/RECORD,,
68
+ warn_scraper-1.2.82.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
69
+ warn_scraper-1.2.82.dist-info/METADATA,sha256=kxvv_1p1AQwjc40wHLkQddflASZc9I4_GxtcySjdNIo,2025
70
+ warn_scraper-1.2.82.dist-info/WHEEL,sha256=Wyh-_nZ0DJYolHNn1_hMa4lM7uDedD_RGVwbmTjyItk,91
71
+ warn_scraper-1.2.82.dist-info/entry_points.txt,sha256=poh_oSweObGlBSs1_2qZmnTodlOYD0KfO7-h7W2UQIw,47
72
+ warn_scraper-1.2.82.dist-info/top_level.txt,sha256=gOhHgNEkrUvajlzoKkVOo-TlQht9MoXnKOErjzqLGHo,11
73
+ warn_scraper-1.2.82.dist-info/RECORD,,