warn-scraper 1.2.101__py3-none-any.whl → 1.2.103__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
warn/scrapers/co.py CHANGED
@@ -105,6 +105,7 @@ def scrape(
105
105
 
106
106
  # Clean up the headers
107
107
  header_crosswalk = {
108
+ "Name": "company",
108
109
  "Company Name": "company",
109
110
  "Company": "company",
110
111
  "WARN Date": "notice_date",
warn/scrapers/ct.py CHANGED
@@ -70,7 +70,9 @@ def scrape(
70
70
 
71
71
  # Parse out the table
72
72
  soup = BeautifulSoup(html, "html.parser")
73
- if year == 2016:
73
+ if year >= 2025:
74
+ table = soup.find_all("table", "style30")
75
+ elif year == 2016:
74
76
  table = soup.find_all("table", "style15")
75
77
  else:
76
78
  table = soup.find_all("table", "MsoNormalTable")
warn/scrapers/ny.py CHANGED
@@ -1,14 +1,15 @@
1
1
  import logging
2
2
  from pathlib import Path
3
3
 
4
- from bs4 import BeautifulSoup
5
- from openpyxl import load_workbook
6
-
7
4
  from .. import utils
8
5
  from ..cache import Cache
9
6
 
10
- __authors__ = ["zstumgoren", "Dilcia19", "ydoc5212", "palewire"]
11
- __tags__ = ["historical", "excel"]
7
+ # from bs4 import BeautifulSoup
8
+ # from openpyxl import load_workbook
9
+
10
+
11
+ __authors__ = ["zstumgoren", "Dilcia19", "ydoc5212", "palewire", "stucka"]
12
+ __tags__ = ["historical", "excel", "html"]
12
13
  __source__ = {
13
14
  "name": "New York Department of Labor",
14
15
  "url": "https://dol.ny.gov/warn-notices",
@@ -32,99 +33,33 @@ def scrape(
32
33
  """
33
34
  cache = Cache(cache_dir)
34
35
 
35
- # Get the latest HTML page
36
- url_list = [
37
- dict(year=2023, url="https://dol.ny.gov/warn-notices"),
38
- dict(year=2022, url="https://dol.ny.gov/2022-warn-notices"),
39
- dict(year=2021, url="https://dol.ny.gov/warn-notices-2021"),
40
- ]
36
+ """
37
+ In 2025 New York shifte from a collection of Excel and HTML to something in Tableau. Tableau notes:
38
+ Find a new landing page for a data page, now done in Tableau: https://dol.ny.gov/warn-dashboard
39
+ Scroll down and there's a "View in Tableau Public" I don't remember clicking
40
+ Opens in new tab at https://public.tableau.com/app/profile/kylee.teague2482/viz/WorkerAdjustmentRetrainingNotificationWARN/WARN
41
+ Append .csv to the end of that URL:
42
+ https://public.tableau.com/app/profile/kylee.teague2482/viz/WorkerAdjustmentRetrainingNotificationWARN/WARN.csv
43
+ Try it in requests, no good. Try it in browser again. File downloads. Find it in the downloads section of the browser. Right-click, copy download link, try that in requests and ... it worked?
44
+ """
41
45
 
42
- # Loop through the urls and get the stuff
43
- html_row_list = []
44
- for config in url_list:
45
- html_row_list += _get_html_data(cache, config)
46
+ url = "https://public.tableau.com/views/WorkerAdjustmentRetrainingNotificationWARN/WARN.csv?%3Adisplay_static_image=y&%3AbootstrapWhenNotified=true&%3Aembed=true&%3Alanguage=en-US&:embed=y&:showVizHome=n&:apiID=host0#navType=0&navSrc=Parse"
46
47
 
47
- # Get the historical static data file
48
- excel_row_list = _get_historical_data(cache)
48
+ csv_file = "ny/tableau.csv"
49
+
50
+ cache.download(csv_file, url)
51
+
52
+ mydata = cache.read_csv(csv_file)
49
53
 
50
54
  # Set the export path
51
55
  data_path = data_dir / "ny.csv"
52
56
 
53
57
  # Combine and write out the file
54
- fieldnames = list(html_row_list[0].keys()) + list(excel_row_list[0].keys())
55
- row_list = html_row_list + excel_row_list
56
- utils.write_dict_rows_to_csv(
57
- data_path,
58
- fieldnames,
59
- row_list,
60
- extrasaction="ignore",
61
- )
58
+ utils.write_rows_to_csv(data_path, mydata)
62
59
 
63
60
  # Return the path to the file
64
61
  return data_path
65
62
 
66
63
 
67
- def _get_html_data(cache, config):
68
- r = utils.get_url(config["url"])
69
- html = r.text
70
-
71
- # Save it to the cache
72
- cache.write(f"ny/{config['year']}.html", html)
73
-
74
- # Parse the HTML and grab our table
75
- soup = BeautifulSoup(html, "html.parser")
76
- table = soup.find("div", class_="landing-paragraphs").find("table")
77
-
78
- row_list = []
79
- # Loop through the rows of the table
80
- for tr in table.find_all("tr")[1:]:
81
- td_list = tr.find_all("td")
82
- d = dict(
83
- company_name=td_list[0].a.text,
84
- notice_url=td_list[0].a["href"],
85
- date_posted=td_list[1].text,
86
- notice_dated=td_list[2].text,
87
- )
88
- row_list.append(d)
89
- return row_list
90
-
91
-
92
- def _get_historical_data(cache):
93
- # Request the page and save it to the cache
94
- url = (
95
- "https://storage.googleapis.com/bln-data-public/warn-layoffs/ny_historical.xlsx"
96
- )
97
-
98
- excel_path = cache.download("ny/source.xlsx", url)
99
-
100
- # Open it up
101
- workbook = load_workbook(filename=excel_path)
102
-
103
- # Get the first sheet
104
- worksheet = workbook.worksheets[0]
105
-
106
- # Convert the sheet to a list of lists
107
- row_list = []
108
- for r in worksheet.rows:
109
- column = [cell.value for cell in r]
110
- row_list.append(column)
111
-
112
- # Transform this into a list of dictionaries with headers as keys
113
- header_list = row_list.pop(0)
114
- dict_list = []
115
- for row in row_list:
116
- d = {}
117
- for i, cell in enumerate(row):
118
- key = header_list[i]
119
- # Skip any columns where the header is null
120
- if key is None:
121
- continue
122
- d[key] = cell
123
- dict_list.append(d)
124
-
125
- # Return the list of dicts
126
- return dict_list
127
-
128
-
129
64
  if __name__ == "__main__":
130
65
  scrape()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: warn-scraper
3
- Version: 1.2.101
3
+ Version: 1.2.103
4
4
  Summary: Command-line interface for downloading WARN Act notices of qualified plant closings and mass layoffs from state government websites
5
5
  Home-page: https://github.com/biglocalnews/warn-scraper
6
6
  Author: Big Local News
@@ -29,8 +29,8 @@ warn/scrapers/ak.py,sha256=h7BYMTV0whwWAPhbzVDVKMMoVCFphKly70aiTHabPq4,1847
29
29
  warn/scrapers/al.py,sha256=D0rT9GQ0vwfkRuveVAt-Po-T6b2TI1EPGeLOBy2m3_M,2240
30
30
  warn/scrapers/az.py,sha256=elGbue01Gjf_DQ66Wy9qqGIOJsiY-KIKJOVeft8pCXg,1447
31
31
  warn/scrapers/ca.py,sha256=VQOfjHXPCc-jYwh-EPGVVfnzvXB7pdmCt2uJ6QnMPRM,8600
32
- warn/scrapers/co.py,sha256=Zpz96Ftqti3Rx-vJEgSUAbvYmbHJ-w0tnt7mQ4FL6CA,7851
33
- warn/scrapers/ct.py,sha256=HLMmBSFhT5Y3vZQUwRyCTxiG5BMQXTfG3SEj5rkQEL4,4771
32
+ warn/scrapers/co.py,sha256=hUfqrzlhXQBkP4vxewVRrMZrgInoLer5S2MZlyYIQE4,7878
33
+ warn/scrapers/ct.py,sha256=PKeZtlB0-z2wCmYmGl_WYoVo2gzwKV36upZcJVaJxjM,4852
34
34
  warn/scrapers/dc.py,sha256=_sHLnVqK_W90QqJb_W88yDlgPjoMl63LYZP3CJfdN9g,4484
35
35
  warn/scrapers/de.py,sha256=GyM92A-lFwZAfRxgbO-sIWhRfmBEKirzchaPIv-u0o4,1364
36
36
  warn/scrapers/fl.py,sha256=YJ6Qt-jJZ7_iUKlHDaQuaV2gRmae8AJKS5dwwChadBE,9563
@@ -51,7 +51,7 @@ warn/scrapers/mt.py,sha256=t2MP4OCcuCEnrnvNgOu289P0eekZq4XaCK65qzgZX88,2457
51
51
  warn/scrapers/ne.py,sha256=JawuGJ3tCKvMd-N-p03gnltB4rol4QUJshMk2oyMPO4,4143
52
52
  warn/scrapers/nj.py,sha256=nwbMbeQuUJbYRVoyUyKZBmNqvqsXu3Habt-10r8DvZE,2230
53
53
  warn/scrapers/nm.py,sha256=HZpfLzn0LvLeRztYvqJ9n6FR5PYpyMndo8tzI8h9S2o,3581
54
- warn/scrapers/ny.py,sha256=kuBdgF1C_GnHEWlaLAas6zJG2Xhfj3c4q_-tuJLX2rk,3615
54
+ warn/scrapers/ny.py,sha256=hXbxPhiK-Eyc9h_05wkAsfdVIT0vayKX4EE5aiJVdBc,2291
55
55
  warn/scrapers/oh.py,sha256=2MEB_0AT37dsAsrhdl_Y0LUNHu0xGy4B1F7aSMhuUu0,3151
56
56
  warn/scrapers/ok.py,sha256=qJE49VY6dMhbokFB9IAOL2XyuYSJpEKKxITPO9sUHS4,1197
57
57
  warn/scrapers/or.py,sha256=0PjyrW3CHdxtHhqEo3Ob-9B6YckACoBD3K0c4FPQUcg,5208
@@ -65,9 +65,9 @@ warn/scrapers/va.py,sha256=hOPuiAjnTmtXCOdnBM_jAJuz9_u6oCxtbm2F-9m3ot0,10732
65
65
  warn/scrapers/vt.py,sha256=d-bo4WK2hkrk4BhCCmLpEovcoZltlvdIUB6O0uaMx5A,1186
66
66
  warn/scrapers/wa.py,sha256=UXdVtHZo_a-XfoiyOooTRfTb9W3PErSZdKca6SRORgs,4282
67
67
  warn/scrapers/wi.py,sha256=ClEzXkwZbop0W4fkQgsb5oHAPUrb4luUPGV-jOKwkcg,4855
68
- warn_scraper-1.2.101.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
69
- warn_scraper-1.2.101.dist-info/METADATA,sha256=sgTsL26CgS1htJ6S-r0m62ljnet7Ilg4-zWjBSiJmdA,2385
70
- warn_scraper-1.2.101.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
71
- warn_scraper-1.2.101.dist-info/entry_points.txt,sha256=poh_oSweObGlBSs1_2qZmnTodlOYD0KfO7-h7W2UQIw,47
72
- warn_scraper-1.2.101.dist-info/top_level.txt,sha256=gOhHgNEkrUvajlzoKkVOo-TlQht9MoXnKOErjzqLGHo,11
73
- warn_scraper-1.2.101.dist-info/RECORD,,
68
+ warn_scraper-1.2.103.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
69
+ warn_scraper-1.2.103.dist-info/METADATA,sha256=sszMf7V4gL8OsUkMeNzwFqW8Ii-wRXK_D1dZ9zhazBk,2385
70
+ warn_scraper-1.2.103.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
71
+ warn_scraper-1.2.103.dist-info/entry_points.txt,sha256=poh_oSweObGlBSs1_2qZmnTodlOYD0KfO7-h7W2UQIw,47
72
+ warn_scraper-1.2.103.dist-info/top_level.txt,sha256=gOhHgNEkrUvajlzoKkVOo-TlQht9MoXnKOErjzqLGHo,11
73
+ warn_scraper-1.2.103.dist-info/RECORD,,