warn-scraper 1.2.152.dev0__py3-none-any.whl → 1.2.154.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
File without changes
@@ -0,0 +1,361 @@
1
+ import json
2
+ import logging
3
+ import re
4
+
5
+ import camelot # pip install camelot-py==1.0.9 for now
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+
10
+ def clean_cell(text: str) -> str:
11
+ """
12
+ Clean up text from a PDF cell.
13
+
14
+ Keyword arguments:
15
+ text -- the text to clean
16
+
17
+ Returns: the cleaned text
18
+ """
19
+ # Replace None with an empty string
20
+ if text is None:
21
+ return ""
22
+
23
+ # Standardize whitespace
24
+ clean_text = re.sub(r"\s+", " ", text).strip()
25
+
26
+ return clean_text
27
+
28
+
29
+ def clean_row(row: list):
30
+ """Clean up text from a list of strings.
31
+
32
+ args:
33
+ row (list): list of strings
34
+ returns:
35
+ line (list): list of strings, each with minimal whitespace
36
+ """
37
+ line: list = []
38
+ for cell in row:
39
+ line.append(clean_cell(cell))
40
+ return line
41
+
42
+
43
+ def is_empty(row: list) -> bool:
44
+ """
45
+ Check if a row has no populated cells.
46
+
47
+ Keyword arguments:
48
+ row -- the row to check
49
+
50
+ Returns: True if the row is empty, False otherwise
51
+ """
52
+ return len(list(filter(None, row))) == 0
53
+
54
+
55
+ def is_mostly_empty(row: list) -> bool:
56
+ """
57
+ Check if a row has few populated cells. Used to determine if carried over from a previous page.
58
+
59
+ Keyword arguments:
60
+ row -- the row to check
61
+
62
+ Returns: True if the row is mostly empty, False otherwise
63
+ """
64
+ return len(list(filter(None, row))) <= 2
65
+
66
+
67
+ def has_content(value):
68
+ """Check if a particular value has any content, e.g. is it a null or an empty string."""
69
+ if value is list:
70
+ content = True
71
+ elif value is dict:
72
+ content = True
73
+ elif value is None:
74
+ content = False
75
+ else:
76
+ value = str(value).strip()
77
+ if len(value) > 0:
78
+ content = True
79
+ else:
80
+ content = False
81
+ return content
82
+
83
+
84
+ def count_data_items(row: list, prefixes=None) -> int:
85
+ """
86
+ Count number of non-blank non-null data items in a row that aren't an internal variable.
87
+
88
+ Args:
89
+ row (list of dicts): The row to check
90
+ prefixes (list) optional: If not provided, will skip data items beginning with ["int_", "_int"]. To empty pass an empty list.
91
+ Returns:
92
+ Integer of how many non-blank non-internal data items there are
93
+ """
94
+ good_items = 0
95
+ if not prefixes:
96
+ prefixes = ["int_", "_int_"]
97
+ for field in row:
98
+ goodfieldname = True
99
+ for prefix in prefixes:
100
+ if field.startswith(prefix):
101
+ goodfieldname = False
102
+ if goodfieldname:
103
+ if has_content(row[field]):
104
+ good_items += 1
105
+ return good_items
106
+
107
+
108
+ def drop_thin_rows(rows: list, cutnumber: int, prefixes=None):
109
+ """
110
+ Drop rows with an improperly low count of valid entries, after filtering out prefixed rows of safe data.
111
+
112
+ Args:
113
+ row: List of dicts
114
+ cutnumber: Cut rows with X or fewer full items. x + 1, then, would be the minimum count of good.
115
+ prefixes: list, optional. If not provided will neglect to count data items beginning with ["int_", "_int_"]. To empty pass an empty list.
116
+ Returns:
117
+ line: List of dics
118
+ """
119
+ lines = []
120
+ if not prefixes:
121
+ prefixes = ["int_", "_int_"]
122
+ for row in rows:
123
+ if count_data_items(row, prefixes=prefixes) > cutnumber:
124
+ lines.append(row)
125
+ return lines
126
+
127
+
128
+ internal_documentation_such_as_it_is = """
129
+ OK, this is going to be messy. The higher-level overview:
130
+ We get lists of strings from the PDF, an ostensible PDF row.
131
+
132
+ Some of these lists are going to be headers. The headers, of course, need to be detected initially.
133
+
134
+ And sometimes the headers show up as their own table, with nothing else.
135
+ If this is the case, they need to be applied as the headers to subsequent tables.
136
+
137
+ But headers can also repeat across pages, so we need to detect them.
138
+
139
+ To add to the fun, each of these rows from the PDF may be just part of another logical row,
140
+ from when cells are divided horizontally to hold multiple data points.
141
+
142
+ We need to detect those fragmentary lines, mostly by checking to see if most cells are empty.
143
+
144
+ If they're a fragment of a header, we need to track it somehow and build a structure to hold the fragment.
145
+ And remember header fragments may occur on multiple pages with multipage headers.
146
+ That means we need to build an initial structure to hold the headers, then skip some rows if we see the header again.
147
+
148
+ For non-header fragments, we need to append the data to the previous line in an appropriate data structure.
149
+
150
+ But wait! There's more!
151
+
152
+ PDF data tends to be really dirty, lots of junky white space.
153
+
154
+ Some people will use multiline data to show multiple data points in a single cell, such as Company name<newline>, City, State ZIP.
155
+ If we strip off white space, we're losing a way to segregate and process that data later. So we can't clean it up until later.
156
+ Unless it's for fragmentary rows, because we need to know that they're fragmentary and white space will wreck the count.
157
+
158
+ And of course lots of rows are entirely white space, just blank data rows left in a PDF. Those we just drop.
159
+
160
+ To sum up:
161
+ Just about every PDF row can be
162
+ An orphaned header, alone in the table
163
+ A full header row
164
+ A fragmentary header
165
+ A full data row
166
+ A fragmentary data row
167
+ A blank row
168
+
169
+ We need many little trackers to go through here and figure out what we're looking at.
170
+
171
+ We need code to clean up whitespace in cells and rows.
172
+
173
+ We need a function to delete rows with fewer than a certain number of data points (e.g., contents of a summary table).
174
+
175
+ We need a function that allows us to standardize header names.
176
+
177
+ We probably want code that tells us what PDF this is pulled from, on which row.
178
+ """
179
+
180
+
181
+ def parse_pdf(pdffile: str, field_fixes: dict | None = None):
182
+ """Parse a PDF file to extract data from tables.
183
+
184
+ Args:
185
+ Filename (string)
186
+ field_fixes (string or dict): If supplied, a dictionary of header lookup values with values of the target name
187
+
188
+ Returns:
189
+ filelist: A list of dictionaries of data rows keyed to headers
190
+ filerowholder: Debugging data showing how row types were determined
191
+ """
192
+ if not field_fixes:
193
+ logger.debug(
194
+ "No 'field_fixes' variable submitted to pdfrodent.parse_pdf function."
195
+ )
196
+ field_fixes = {}
197
+ else:
198
+ logger.debug(f"{len(field_fixes):,} field_fixes to be used to clean headers.")
199
+ filelist = []
200
+ filerowholder = []
201
+ logger.debug(f"Opening {pdffile} for PDF parsing")
202
+ tables = camelot.read_pdf(pdffile, pages="all")
203
+ orphanedheader = False
204
+ orphanholder = None
205
+ for tableindex, table in enumerate(tables):
206
+ locallist: list = []
207
+ logger.debug(f"Processing table {tableindex} of {pdffile}")
208
+ filerowholder.append(f"Processing table {tableindex} of {pdffile}")
209
+ rawheader = None
210
+ headerfirst = []
211
+ headersupplement: dict = {}
212
+ isheader = True
213
+ seendata = False
214
+ logger.debug(
215
+ f"Processing table {tableindex} with {len(table.rows)} of {pdffile}"
216
+ )
217
+
218
+ # If the table has only one row, it's a stray header and should be used with the next table.
219
+ if len(table.rows) == 1:
220
+ logger.debug("\tOrphaned header detected!")
221
+ filerowholder.append("\tOrphaned header detected!")
222
+ orphanedheader = True
223
+ patchedheaders = []
224
+ rawheader = table.data[0]
225
+ for item in clean_row(rawheader):
226
+ if item in field_fixes:
227
+ patchedheaders.append(field_fixes[item])
228
+ else:
229
+ logger.debug(
230
+ f"New header type found: {item}, not in {' '.join(sorted(list(field_fixes.keys())))}"
231
+ )
232
+ patchedheaders.append(item)
233
+ orphanholder = {
234
+ "rawheader": rawheader,
235
+ "patchedheaders": patchedheaders,
236
+ }
237
+ logger.debug(f"{orphanholder}")
238
+ filerowholder.append(f"{orphanholder}")
239
+ # If there are multiple rows, there are a bunch of possibilities we need to poke ...
240
+ else:
241
+ # If we have a header from a one-row table, prepare to use the orphaned header
242
+ if orphanedheader:
243
+ isheader = True
244
+ rawheader = orphanholder["rawheader"] # type: ignore
245
+ headerfirst = orphanholder["patchedheaders"] # type: ignore
246
+
247
+ for rowindex, row in enumerate(table.data):
248
+ filerowholder.append(row)
249
+ line: dict = {} # rows in, lines out
250
+ # If it's the first row in a table and we don't have an orphaned header,
251
+ # it's an index row
252
+ if rowindex == 0 and not orphanedheader:
253
+ rawheader = row
254
+ patchedheaders = []
255
+ for item in clean_row(rawheader):
256
+ if item in field_fixes:
257
+ patchedheaders.append(field_fixes[item])
258
+ else:
259
+ logger.debug(
260
+ f"New header type found: {item}, not in {' '.join(sorted(list(field_fixes.keys())))}"
261
+ )
262
+ patchedheaders.append(item)
263
+ headerfirst = patchedheaders
264
+ isheader = True
265
+ filerowholder.append("\tIndex row!")
266
+
267
+ elif row == rawheader: # Later instance of a page header
268
+ isheader = True
269
+ filerowholder.append("\tRepeated header")
270
+
271
+ # Drop blank rows entirely
272
+ elif is_empty(clean_row(row)):
273
+ filerowholder.append("\tEmpty row")
274
+ pass
275
+
276
+ # Handle fragmentary records
277
+ elif is_mostly_empty(clean_row(row)):
278
+ filerowholder.append("\tMostly empty row!")
279
+ if not seendata: # Is this part of the initial header?
280
+ filerowholder.append("\tMostly empty row, haven't seen data")
281
+ for cellindex, cell in enumerate(row):
282
+ cleancell = clean_cell(cell)
283
+ if len(cleancell) > 0: # If we have good data
284
+ fieldname = f"supplement{cellindex}"
285
+ headersupplement[fieldname] = None # type: ignore
286
+ isheader = False
287
+ orphanedheader = False
288
+
289
+ else: # seenheader
290
+ if isheader: # Supplement to a header on a latter page
291
+ filerowholder.append(
292
+ "\tMostly empty row, seems to be appending to a header"
293
+ )
294
+ for cellindex, cell in enumerate(row):
295
+ cleancell = clean_cell(cell)
296
+ if len(cleancell) > 0: # If we have good data
297
+ if cleancell not in headersupplement:
298
+ headersupplement[cellindex] = headersupplement
299
+ logger.debug(
300
+ f"Added {cleancell} to headersupplement, which now holds: {headersupplement}"
301
+ )
302
+ isheader = False
303
+
304
+ else: # Not a header, have seenheader; must be a regular row supplement
305
+ orphanedheader = False
306
+ isheader = False
307
+ filerowholder.append(
308
+ "\tMostly empty row, seems to be detailed info for a regular row"
309
+ )
310
+ for cellindex, cell in enumerate(row):
311
+ cleancell = clean_cell(cell)
312
+ if len(cleancell) > 0: # If we have good data
313
+ if cellindex in headersupplement:
314
+ fieldname = headersupplement[cellindex] # type: ignore
315
+ else:
316
+ fieldname = f"supplement_{cellindex}"
317
+ logger.warning(
318
+ f"Found {fieldname} as {cleancell} but not located in supplemental headers: {headersupplement}"
319
+ )
320
+ if fieldname in field_fixes:
321
+ logger.debug(
322
+ f"Shifting cell with {fieldname} to {field_fixes[fieldname]}"
323
+ )
324
+ fieldname = field_fixes[fieldname]
325
+ locallist[-1][
326
+ fieldname
327
+ ] = cleancell # Add it to the previous line
328
+ isheader = False
329
+
330
+ else:
331
+ # It's not an orphaned header
332
+ # It's not the initial header
333
+ # It's not a supplemental header
334
+ # It's not an empty row
335
+ # It's not a supplemental data row
336
+ # We ... actually have a regular data row here.
337
+ orphanedheader = False
338
+ filerowholder.append("\tSeems to be a regular row.")
339
+ isheader = False
340
+ seendata = True
341
+ for cellindex, cell in enumerate(row):
342
+ line[headerfirst[cellindex]] = clean_cell(cell)
343
+ filerowholder.append(f"\t\t{line}")
344
+ locallist.append(line)
345
+
346
+ report = table.parsing_report
347
+
348
+ for lineindex, line in enumerate(locallist):
349
+ line["_int_accuracy"] = report["accuracy"]
350
+ line["_int_pdf_filename"] = pdffile.split("/")[-1].split("\\")[-1]
351
+ line["_int_page"] = report["page"]
352
+ line["_int_table_number"] = report["order"]
353
+ line["_int_raw_fields"] = json.dumps(list(line.values()))
354
+ line["_int_data_items"] = count_data_items(line) # type: ignore
355
+ if "Event Number" in line:
356
+ line["Event Number"] = line["Event Number"].replace("\n", "")
357
+
358
+ locallist[lineindex] = line # Save it back
359
+
360
+ filelist.extend(locallist)
361
+ return (filelist, filerowholder)
warn/scrapers/ms.py ADDED
@@ -0,0 +1,150 @@
1
+ import json
2
+ import logging
3
+ from pathlib import Path
4
+
5
+ from pyquery import PyQuery as pq
6
+
7
+ from warn.pdfrodent import pdfrodent as pdfrodent
8
+
9
+ from .. import utils
10
+ from ..cache import Cache
11
+
12
+ __authors__ = ["Ash1R", "stucka"]
13
+ __tags__ = ["pdf"]
14
+ __source__ = {
15
+ "name": "Mississippi Department of Employment Security",
16
+ "url": "https://mdes.ms.gov/information-center/warn-information/",
17
+ }
18
+
19
+ logger = logging.getLogger(__name__)
20
+ want_debugging_file = True
21
+
22
+
23
+ def scrape(
24
+ data_dir: Path = utils.WARN_DATA_DIR,
25
+ cache_dir: Path = utils.WARN_CACHE_DIR,
26
+ ) -> Path:
27
+ """
28
+ Scrape data from Mississippi.
29
+
30
+ Keyword arguments:
31
+ data_dir -- the Path were the result will be saved (default WARN_DATA_DIR)
32
+ cache_dir -- the Path where results can be cached (default WARN_CACHE_DIR)
33
+
34
+ Returns: the Path where the file is written
35
+ """
36
+ cache = Cache(cache_dir)
37
+ remoteurl = __source__["url"]
38
+ urlprefix = remoteurl.split(".gov")[0] + ".gov"
39
+
40
+ html = utils.get_url(remoteurl).text
41
+ cache.write("ms/index.html", html)
42
+
43
+ content = pq(html)("div#page_content")
44
+ anchors = pq(content)("a")
45
+
46
+ # Parse HTML to identify relevant PDFs
47
+ urlswanted = []
48
+ for anchor in anchors:
49
+ href = pq(anchor).attr("href")
50
+ remoteurl = href
51
+ if "http" not in remoteurl:
52
+ remoteurl = urlprefix + remoteurl
53
+ if remoteurl.endswith(".pdf"):
54
+ if not remoteurl.endswith("map.pdf"):
55
+ urlswanted.append(remoteurl)
56
+
57
+ # Get the files. The five first-listed files, we want fresh.
58
+ # That should cover every quarter in the latest year, and one quarter of the previous year, at least.
59
+ for i, urlwanted in enumerate(urlswanted):
60
+ basefilename = urlwanted.split("/")[-1]
61
+ localfilename = cache_dir / f"ms/{basefilename}"
62
+ if i <= 4: # Get the five newest files to ensure proper overlap
63
+ logger.debug(f"Fetching fresh copy of {localfilename}")
64
+ utils.save_if_good_url(localfilename, urlwanted)
65
+ else:
66
+ logger.debug(f"Getting copy of {localfilename} if needed")
67
+ utils.fetch_if_not_cached(localfilename, urlwanted)
68
+
69
+ pdffiles = sorted(cache.files(subdir="ms/", glob_pattern="*.pdf"))
70
+
71
+ headerfixes = {
72
+ "": "blank_entry",
73
+ "# Affected": "affected",
74
+ "# Of Notices Received": "notices_received",
75
+ "City": "city",
76
+ "Company Name": "company",
77
+ "Company Name (City) (County)": "company",
78
+ "Company Name (City) (County) (Zip)": "company",
79
+ "Company Name City (County)": "company",
80
+ "Company Name City, (County)": "company",
81
+ "Company Name, City (County)": "company",
82
+ "Company Name, City, County": "company",
83
+ "County": "county",
84
+ "Date of Action": "date_effective",
85
+ "Date of Notice": "date_notice",
86
+ "Date of WARN Notice": "date_notice",
87
+ "Event Number": "event_number",
88
+ "NAICS CODE & Description": "naics",
89
+ "NAICS CODE – Description": "naics",
90
+ "Notices Received": "notices_received",
91
+ "Number Of Notices Received": "notices_received",
92
+ "Number Of Notices Received October 2024 – December 2024": "notices_received",
93
+ "Number Affected": "affected",
94
+ "Reason / Comments": "reason",
95
+ "Reason – Comments": "reason",
96
+ "Type of Action": "action_type",
97
+ "Type of Action # Affected": "action_type",
98
+ "T ypes of Notice": "notice_types",
99
+ "T ypes of Notices Received": "notice_types",
100
+ "Type of Notice": "notice_types",
101
+ "Types of Notice": "notice_types",
102
+ "Types of Notices": "notice_types",
103
+ "Types of Notices Received": "notice_types",
104
+ "Workforc e Area": "workforce_area",
105
+ "Workforce Area": "workforce_area",
106
+ "_int_accuracy": "_int_accuracy",
107
+ "_int_data_items": "_int_data_items",
108
+ "_int_page": "_int_page",
109
+ "_int_pdf_filename": "_int_pdf_filename",
110
+ "_int_raw_fields": "_int_raw_fields",
111
+ "_int_table_number": "_int_table_number",
112
+ "supplement_0": "supplement_0",
113
+ "supplement_1": "supplement_1",
114
+ "supplement_2": "supplement_2",
115
+ "supplement_5": "affected", # Only carries from 2025sq2
116
+ }
117
+
118
+ masterlist = []
119
+ rowholder = []
120
+ for pdffile in pdffiles:
121
+ locallist, localrows = pdfrodent.parse_pdf(pdffile, headerfixes)
122
+ masterlist.extend(locallist)
123
+ rowholder.extend(localrows)
124
+
125
+ # Identify all header elements, even in the ones we're about to remove.
126
+ allheaders = set()
127
+ for row in masterlist:
128
+ for item in row:
129
+ allheaders.add(item)
130
+ text = ""
131
+ for item in sorted(allheaders):
132
+ text += f"\t\t'{item}': ,\n"
133
+ with open(Path(cache_dir) / "ms/allheaders.txt", "w") as outfile:
134
+ outfile.write(text)
135
+
136
+ targetfilename = data_dir / "ms.csv"
137
+ logger.debug(f"Found {len(masterlist):,} extracted rows from the PDFs.")
138
+ cleaned = pdfrodent.drop_thin_rows(masterlist, 6)
139
+ logger.debug(
140
+ f"After filtering out thin rows, we have {len(cleaned):,} rows of data meeting standards."
141
+ )
142
+ # utils.write_disparate_dict_rows_to_csv(targetfilename, masterlist)
143
+ utils.write_disparate_dict_rows_to_csv(targetfilename, cleaned)
144
+
145
+ if want_debugging_file:
146
+ with open(Path(cache_dir) / "ms/debugging.txt", "w") as outfile:
147
+ for row in rowholder:
148
+ outfile.write(json.dumps(row) + "\r\n")
149
+
150
+ return targetfilename
warn/utils.py CHANGED
@@ -243,22 +243,40 @@ def write_dict_rows_to_csv(output_path, headers, rows, mode="w", extrasaction="r
243
243
  writer.writerow(row)
244
244
 
245
245
 
246
- def write_disparate_dict_rows_to_csv(output_path, rows, mode="w"):
246
+ def write_disparate_dict_rows_to_csv(
247
+ output_path, rows, mode="w", prefixes: None | list = None
248
+ ):
247
249
  """Write the provided list of dictionaries to the provided path as comma-separated values, while determining a header.
248
250
 
249
251
  Args:
250
252
  output_path (Path): the Path were the result will be saved
251
253
  rows (list): the list of dictionaries to be saved; can have disparate dict keys
252
254
  mode (str): the mode to be used when opening the file (default 'w')
255
+ prefixes(list|None): text strings that determine whether fields should arrive after other fields.
256
+ Send an empty list, [], to run without any prefixes.
257
+ Send None or don't send to use default prefixes of _int_ and int_
253
258
  """
259
+ if not prefixes:
260
+ prefixes = ["int_", "_int_"]
261
+ logger.debug(f"Writing {(len(rows)+1):,} rows to {output_path}")
254
262
  create_directory(output_path, is_file=True)
255
- headers: set = set() # Get all the potential header names
263
+ headers: list = [] # We want to preserve order, and set won't do it.
264
+ headerextras: list = [] # stuff that should be at the right of the field list
256
265
  for row in rows:
257
266
  for item in row:
258
- headers.add(item)
259
- headers = list(sorted(headers))
260
- logger.debug(f"Found {len(headers):,} header entries in list of dicts.")
261
- logger.debug(f"Writing {len(rows):,} rows to {output_path}")
267
+ if item not in headers and item not in headerextras:
268
+ prefixhere = False
269
+ for prefix in prefixes:
270
+ if item.startswith(prefix):
271
+ prefixhere = True
272
+ if prefixhere:
273
+ headerextras.append(item)
274
+ else:
275
+ headers.append(item)
276
+ logger.debug(
277
+ f"Found {(len(headers) + len(headerextras)):,} header entries in the supplied list of dicts."
278
+ )
279
+ headers.extend(headerextras)
262
280
  with open(output_path, mode, newline="") as outfile:
263
281
  # Create the writer object
264
282
  writer = csv.writer(outfile)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: warn-scraper
3
- Version: 1.2.152.dev0
3
+ Version: 1.2.154.dev0
4
4
  Summary: Command-line interface for downloading WARN Act notices of qualified plant closings and mass layoffs from state government websites
5
5
  Author-email: Big Local News <biglocalnews@stanford.edu>
6
6
  License-Expression: Apache-2.0
@@ -2,7 +2,9 @@ warn/__init__.py,sha256=A07JFY1TyaPtVIndBa7IvTk13DETqIkLgRdk0A-MCoE,85
2
2
  warn/cache.py,sha256=QBSHycchvRTkOQfHptOtZeTYiPgLP383jS8MTiGln_c,5969
3
3
  warn/cli.py,sha256=ZqyJwICdHFkn2hEgbArj_upbElR9-TSDlYDqyEGeexE,2019
4
4
  warn/runner.py,sha256=oeGRybGwpnkQKlPzRMlKxhsDt1GN4PZoX-vUwrsPgos,1894
5
- warn/utils.py,sha256=95aNYviHGvcB4kP96qHnzs6uVTDesE3karvYq5eESsM,12259
5
+ warn/utils.py,sha256=-JF8DnSg-80CbCIswM-rtB0CWf9zSVU56iJNpRw3V-o,13086
6
+ warn/pdfrodent/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
+ warn/pdfrodent/pdfrodent.py,sha256=IajvUyzVuUlph7F3LqaPU0HxDCkHb8YfnP1js4vOoTs,14632
6
8
  warn/platforms/__init__.py,sha256=wIZRDf4tbTuC8oKM4ZrTAtwNgbtMQGzPXMwDYCFyrog,81
7
9
  warn/platforms/job_center/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
10
  warn/platforms/job_center/cache.py,sha256=yhA3sE46lNFg8vEewSoRYVByi0YSlkBiKm7qoSUiTdM,1868
@@ -32,6 +34,7 @@ warn/scrapers/md.py,sha256=hwgxXQnhyBWm8qF1dvxIThAX1MkrZbXLwRI9inO5t8g,4060
32
34
  warn/scrapers/me.py,sha256=q36F4yJ7hvZsLayA3uBS1romo4X3Qf-sEi2Y7LAQCi8,1172
33
35
  warn/scrapers/mi.py,sha256=Ppyawp4nbzSBODuzDKeqnO9_9do5MFwK4Y_f3uc6blE,5846
34
36
  warn/scrapers/mo.py,sha256=wnnwQAiVPwuheMqptMXZpyQdiKNghhKwTO-Bnh9oXoU,3492
37
+ warn/scrapers/ms.py,sha256=BZZoMw3TNtwzBeBsqKLsPznBcDjPaO49I5-yBU0e9AI,5502
35
38
  warn/scrapers/mt.py,sha256=t2MP4OCcuCEnrnvNgOu289P0eekZq4XaCK65qzgZX88,2457
36
39
  warn/scrapers/ne.py,sha256=JawuGJ3tCKvMd-N-p03gnltB4rol4QUJshMk2oyMPO4,4143
37
40
  warn/scrapers/nj.py,sha256=nwbMbeQuUJbYRVoyUyKZBmNqvqsXu3Habt-10r8DvZE,2230
@@ -51,9 +54,9 @@ warn/scrapers/va.py,sha256=7Nle7qL0VNPiE653XyaP9HQqSfuJFDRr2kEkjOqLvFM,11269
51
54
  warn/scrapers/vt.py,sha256=d-bo4WK2hkrk4BhCCmLpEovcoZltlvdIUB6O0uaMx5A,1186
52
55
  warn/scrapers/wa.py,sha256=UXdVtHZo_a-XfoiyOooTRfTb9W3PErSZdKca6SRORgs,4282
53
56
  warn/scrapers/wi.py,sha256=ClEzXkwZbop0W4fkQgsb5oHAPUrb4luUPGV-jOKwkcg,4855
54
- warn_scraper-1.2.152.dev0.dist-info/licenses/LICENSE,sha256=ZV-QHyqPwyMuwuj0lI05JeSjV1NyzVEk8Yeu7FPtYS0,585
55
- warn_scraper-1.2.152.dev0.dist-info/METADATA,sha256=7URJSlssMUA03pk1xFWJebi5HmfTOcAjBxgTqzHlf6s,1780
56
- warn_scraper-1.2.152.dev0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
57
- warn_scraper-1.2.152.dev0.dist-info/entry_points.txt,sha256=poh_oSweObGlBSs1_2qZmnTodlOYD0KfO7-h7W2UQIw,47
58
- warn_scraper-1.2.152.dev0.dist-info/top_level.txt,sha256=dZfms6N3kqVXufiPOo7YqOrAcUtYfNH_oyGvYUk9FB4,5
59
- warn_scraper-1.2.152.dev0.dist-info/RECORD,,
57
+ warn_scraper-1.2.154.dev0.dist-info/licenses/LICENSE,sha256=ZV-QHyqPwyMuwuj0lI05JeSjV1NyzVEk8Yeu7FPtYS0,585
58
+ warn_scraper-1.2.154.dev0.dist-info/METADATA,sha256=OtXPhDRnhpTYB_lgZY3y35ZY6IKTYZkZcTFtQeCmaf0,1780
59
+ warn_scraper-1.2.154.dev0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
60
+ warn_scraper-1.2.154.dev0.dist-info/entry_points.txt,sha256=poh_oSweObGlBSs1_2qZmnTodlOYD0KfO7-h7W2UQIw,47
61
+ warn_scraper-1.2.154.dev0.dist-info/top_level.txt,sha256=dZfms6N3kqVXufiPOo7YqOrAcUtYfNH_oyGvYUk9FB4,5
62
+ warn_scraper-1.2.154.dev0.dist-info/RECORD,,