PyPI - warn-scraper - Versions diffs - 1.2.151.dev0__py3-none-any.whl → 1.2.153.dev0__py3-none-any.whl - Mend

warn-scraper 1.2.151.dev0py3-none-any.whl → 1.2.153.dev0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

warn/pdfrodent/__init__.py ADDED Viewed

File without changes

warn/pdfrodent/pdfrodent.py ADDED Viewed

@@ -0,0 +1,361 @@
+import json
+import logging
+import re
+import camelot  # pip install camelot-py==1.0.9 for now
+logger = logging.getLogger(__name__)
+def clean_cell(text: str) -> str:
+    """
+    Clean up text from a PDF cell.
+    Keyword arguments:
+    text -- the text to clean
+    Returns: the cleaned text
+    """
+    # Replace None with an empty string
+    if text is None:
+        return ""
+    # Standardize whitespace
+    clean_text = re.sub(r"\s+", " ", text).strip()
+    return clean_text
+def clean_row(row: list):
+    """Clean up text from a list of strings.
+    args:
+        row (list): list of strings
+    returns:
+        line (list): list of strings, each with minimal whitespace
+    """
+    line: list = []
+    for cell in row:
+        line.append(clean_cell(cell))
+    return line
+def is_empty(row: list) -> bool:
+    """
+    Check if a row has no populated cells.
+    Keyword arguments:
+    row -- the row to check
+    Returns: True if the row is empty, False otherwise
+    """
+    return len(list(filter(None, row))) == 0
+def is_mostly_empty(row: list) -> bool:
+    """
+    Check if a row has few populated cells. Used to determine if carried over from a previous page.
+    Keyword arguments:
+    row -- the row to check
+    Returns: True if the row is mostly empty, False otherwise
+    """
+    return len(list(filter(None, row))) <= 2
+def has_content(value):
+    """Check if a particular value has any content, e.g. is it a null or an empty string."""
+    if value is list:
+        content = True
+    elif value is dict:
+        content = True
+    elif value is None:
+        content = False
+    else:
+        value = str(value).strip()
+        if len(value) > 0:
+            content = True
+        else:
+            content = False
+    return content
+def count_data_items(row: list, prefixes=None) -> int:
+    """
+    Count number of non-blank non-null data items in a row that aren't an internal variable.
+    Args:
+        row (list of dicts): The row to check
+        prefixes (list) optional: If not provided, will skip data items beginning with ["int_", "_int"]. To empty pass an empty list.
+    Returns:
+        Integer of how many non-blank non-internal data items there are
+    """
+    good_items = 0
+    if not prefixes:
+        prefixes = ["int_", "_int_"]
+    for field in row:
+        goodfieldname = True
+        for prefix in prefixes:
+            if field.startswith(prefix):
+                goodfieldname = False
+        if goodfieldname:
+            if has_content(row[field]):
+                good_items += 1
+    return good_items
+def drop_thin_rows(rows: list, cutnumber: int, prefixes=None):
+    """
+    Drop rows with an improperly low count of valid entries, after filtering out prefixed rows of safe data.
+    Args:
+        row: List of dicts
+        cutnumber: Cut rows with X or fewer full items. x + 1, then, would be the minimum count of good.
+        prefixes: list, optional. If not provided will neglect to count data items beginning with ["int_", "_int_"]. To empty pass an empty list.
+    Returns:
+        line: List of dics
+    """
+    lines = []
+    if not prefixes:
+        prefixes = ["int_", "_int_"]
+    for row in rows:
+        if count_data_items(row, prefixes=prefixes) > cutnumber:
+            lines.append(row)
+    return lines
+internal_documentation_such_as_it_is = """
+OK, this is going to be messy. The higher-level overview:
+We get lists of strings from the PDF, an ostensible PDF row.
+Some of these lists are going to be headers. The headers, of course, need to be detected initially.
+And sometimes the headers show up as their own table, with nothing else.
+If this is the case, they need to be applied as the headers to subsequent tables.
+But headers can also repeat across pages, so we need to detect them.
+To add to the fun, each of these rows from the PDF may be just part of another logical row,
+from when cells are divided horizontally to hold multiple data points.
+We need to detect those fragmentary lines, mostly by checking to see if most cells are empty.
+If they're a fragment of a header, we need to track it somehow and build a structure to hold the fragment.
+And remember header fragments may occur on multiple pages with multipage headers.
+That means we need to build an initial structure to hold the headers, then skip some rows if we see the header again.
+For non-header fragments, we need to append the data to the previous line in an appropriate data structure.
+But wait! There's more!
+PDF data tends to be really dirty, lots of junky white space.
+Some people will use multiline data to show multiple data points in a single cell, such as Company name<newline>, City, State ZIP.
+If we strip off white space, we're losing a way to segregate and process that data later. So we can't clean it up until later.
+Unless it's for fragmentary rows, because we need to know that they're fragmentary and white space will wreck the count.
+And of course lots of rows are entirely white space, just blank data rows left in a PDF. Those we just drop.
+To sum up:
+Just about every PDF row can be
+An orphaned header, alone in the table
+A full header row
+A fragmentary header
+A full data row
+A fragmentary data row
+A blank row
+We need many little trackers to go through here and figure out what we're looking at.
+We need code to clean up whitespace in cells and rows.
+We need a function to delete rows with fewer than a certain number of data points (e.g., contents of a summary table).
+We need a function that allows us to standardize header names.
+We probably want code that tells us what PDF this is pulled from, on which row.
+"""
+def parse_pdf(pdffile: str, field_fixes: dict | None = None):
+    """Parse a PDF file to extract data from tables.
+    Args:
+        Filename (string)
+        field_fixes (string or dict): If supplied, a dictionary of header lookup values with values of the target name
+    Returns:
+        filelist: A list of dictionaries of data rows keyed to headers
+        filerowholder: Debugging data showing how row types were determined
+    """
+    if not field_fixes:
+        logger.debug(
+            "No 'field_fixes' variable submitted to pdfrodent.parse_pdf function."
+        )
+        field_fixes = {}
+    else:
+        logger.debug(f"{len(field_fixes):,} field_fixes to be used to clean headers.")
+    filelist = []
+    filerowholder = []
+    logger.debug(f"Opening {pdffile} for PDF parsing")
+    tables = camelot.read_pdf(pdffile, pages="all")
+    orphanedheader = False
+    orphanholder = None
+    for tableindex, table in enumerate(tables):
+        locallist: list = []
+        logger.debug(f"Processing table {tableindex} of {pdffile}")
+        filerowholder.append(f"Processing table {tableindex} of {pdffile}")
+        rawheader = None
+        headerfirst = []
+        headersupplement: dict = {}
+        isheader = True
+        seendata = False
+        logger.debug(
+            f"Processing table {tableindex} with {len(table.rows)} of {pdffile}"
+        )
+        # If the table has only one row, it's a stray header and should be used with the next table.
+        if len(table.rows) == 1:
+            logger.debug("\tOrphaned header detected!")
+            filerowholder.append("\tOrphaned header detected!")
+            orphanedheader = True
+            patchedheaders = []
+            rawheader = table.data[0]
+            for item in clean_row(rawheader):
+                if item in field_fixes:
+                    patchedheaders.append(field_fixes[item])
+                else:
+                    logger.debug(
+                        f"New header type found: {item}, not in {' '.join(sorted(list(field_fixes.keys())))}"
+                    )
+                    patchedheaders.append(item)
+            orphanholder = {
+                "rawheader": rawheader,
+                "patchedheaders": patchedheaders,
+            }
+            logger.debug(f"{orphanholder}")
+            filerowholder.append(f"{orphanholder}")
+        # If there are multiple rows, there are a bunch of possibilities we need to poke ...
+        else:
+            # If we have a header from a one-row table, prepare to use the orphaned header
+            if orphanedheader:
+                isheader = True
+                rawheader = orphanholder["rawheader"]  # type: ignore
+                headerfirst = orphanholder["patchedheaders"]  # type: ignore
+            for rowindex, row in enumerate(table.data):
+                filerowholder.append(row)
+                line: dict = {}  # rows in, lines out
+                # If it's the first row in a table and we don't have an orphaned header,
+                # it's an index row
+                if rowindex == 0 and not orphanedheader:
+                    rawheader = row
+                    patchedheaders = []
+                    for item in clean_row(rawheader):
+                        if item in field_fixes:
+                            patchedheaders.append(field_fixes[item])
+                        else:
+                            logger.debug(
+                                f"New header type found: {item}, not in {' '.join(sorted(list(field_fixes.keys())))}"
+                            )
+                            patchedheaders.append(item)
+                    headerfirst = patchedheaders
+                    isheader = True
+                    filerowholder.append("\tIndex row!")
+                elif row == rawheader:  # Later instance of a page header
+                    isheader = True
+                    filerowholder.append("\tRepeated header")
+                # Drop blank rows entirely
+                elif is_empty(clean_row(row)):
+                    filerowholder.append("\tEmpty row")
+                    pass
+                # Handle fragmentary records
+                elif is_mostly_empty(clean_row(row)):
+                    filerowholder.append("\tMostly empty row!")
+                    if not seendata:  # Is this part of the initial header?
+                        filerowholder.append("\tMostly empty row, haven't seen data")
+                        for cellindex, cell in enumerate(row):
+                            cleancell = clean_cell(cell)
+                            if len(cleancell) > 0:  # If we have good data
+                                fieldname = f"supplement{cellindex}"
+                                headersupplement[fieldname] = None  # type: ignore
+                            isheader = False
+                            orphanedheader = False
+                    else:  # seenheader
+                        if isheader:  # Supplement to a header on a latter page
+                            filerowholder.append(
+                                "\tMostly empty row, seems to be appending to a header"
+                            )
+                            for cellindex, cell in enumerate(row):
+                                cleancell = clean_cell(cell)
+                                if len(cleancell) > 0:  # If we have good data
+                                    if cleancell not in headersupplement:
+                                        headersupplement[cellindex] = headersupplement
+                                        logger.debug(
+                                            f"Added {cleancell} to headersupplement, which now holds: {headersupplement}"
+                                        )
+                            isheader = False
+                        else:  # Not a header, have seenheader; must be a regular row supplement
+                            orphanedheader = False
+                            isheader = False
+                            filerowholder.append(
+                                "\tMostly empty row, seems to be detailed info for a regular row"
+                            )
+                            for cellindex, cell in enumerate(row):
+                                cleancell = clean_cell(cell)
+                                if len(cleancell) > 0:  # If we have good data
+                                    if cellindex in headersupplement:
+                                        fieldname = headersupplement[cellindex]  # type: ignore
+                                    else:
+                                        fieldname = f"supplement_{cellindex}"
+                                        logger.warning(
+                                            f"Found {fieldname} as {cleancell} but not located in supplemental headers: {headersupplement}"
+                                        )
+                                        if fieldname in field_fixes:
+                                            logger.debug(
+                                                f"Shifting cell with {fieldname} to {field_fixes[fieldname]}"
+                                            )
+                                            fieldname = field_fixes[fieldname]
+                                    locallist[-1][
+                                        fieldname
+                                    ] = cleancell  # Add it to the previous line
+                        isheader = False
+                else:
+                    # It's not an orphaned header
+                    # It's not the initial header
+                    # It's not a supplemental header
+                    # It's not an empty row
+                    # It's not a supplemental data row
+                    # We ... actually have a regular data row here.
+                    orphanedheader = False
+                    filerowholder.append("\tSeems to be a regular row.")
+                    isheader = False
+                    seendata = True
+                    for cellindex, cell in enumerate(row):
+                        line[headerfirst[cellindex]] = clean_cell(cell)
+                    filerowholder.append(f"\t\t{line}")
+                    locallist.append(line)
+            report = table.parsing_report
+            for lineindex, line in enumerate(locallist):
+                line["_int_accuracy"] = report["accuracy"]
+                line["_int_pdf_filename"] = pdffile.split("/")[-1].split("\\")[-1]
+                line["_int_page"] = report["page"]
+                line["_int_table_number"] = report["order"]
+                line["_int_raw_fields"] = json.dumps(list(line.values()))
+                line["_int_data_items"] = count_data_items(line)  # type: ignore
+                if "Event Number" in line:
+                    line["Event Number"] = line["Event Number"].replace("\n", "")
+                locallist[lineindex] = line  # Save it back
+        filelist.extend(locallist)
+    return (filelist, filerowholder)

warn/scrapers/al.py CHANGED Viewed

@@ -32,11 +32,15 @@ def scrape(
     Returns: the Path where the file is written
     """
     output_csv = data_dir / "al.csv"
-    page = utils.get_url("https://www.madeinalabama.com/warn-list/")
+    # page = utils.get_url("https://www.madeinalabama.com/warn-list/")
+    # URL change in June 2026, maybe led to a HTTP 415 error
+    page = utils.get_url("https://workforce.alabama.gov/warn-list/")
     # can't see 2020 listings when I open web page, but they are on the summary in the google search
     soup = BeautifulSoup(page.text, "html.parser")
     table = soup.find_all("table")  # output is list-type
     table_rows = table[0].find_all("tr")
+    logger.debug(f"{len(table_rows):,} total table rows (including header) found")
     # Handle the header
     raw_header = table_rows.pop(0)
     header_row = _extract_fields_from_row(raw_header, "th")

warn/scrapers/ms.py ADDED Viewed

@@ -0,0 +1,150 @@
+import json
+import logging
+from pathlib import Path
+from pyquery import PyQuery as pq
+from warn.pdfrodent import pdfrodent as pdfrodent
+from .. import utils
+from ..cache import Cache
+__authors__ = ["Ash1R", "stucka"]
+__tags__ = ["pdf"]
+__source__ = {
+    "name": "Mississippi Department of Employment Security",
+    "url": "https://mdes.ms.gov/information-center/warn-information/",
+}
+logger = logging.getLogger(__name__)
+want_debugging_file = True
+def scrape(
+    data_dir: Path = utils.WARN_DATA_DIR,
+    cache_dir: Path = utils.WARN_CACHE_DIR,
+) -> Path:
+    """
+    Scrape data from Mississippi.
+    Keyword arguments:
+    data_dir -- the Path were the result will be saved (default WARN_DATA_DIR)
+    cache_dir -- the Path where results can be cached (default WARN_CACHE_DIR)
+    Returns: the Path where the file is written
+    """
+    cache = Cache(cache_dir)
+    remoteurl = __source__["url"]
+    urlprefix = remoteurl.split(".gov")[0] + ".gov"
+    html = utils.get_url(remoteurl).text
+    cache.write("ms/index.html", html)
+    content = pq(html)("div#page_content")
+    anchors = pq(content)("a")
+    # Parse HTML to identify relevant PDFs
+    urlswanted = []
+    for anchor in anchors:
+        href = pq(anchor).attr("href")
+        remoteurl = href
+        if "http" not in remoteurl:
+            remoteurl = urlprefix + remoteurl
+        if remoteurl.endswith(".pdf"):
+            if not remoteurl.endswith("map.pdf"):
+                urlswanted.append(remoteurl)
+    # Get the files. The five first-listed files, we want fresh.
+    # That should cover every quarter in the latest year, and one quarter of the previous year, at least.
+    for i, urlwanted in enumerate(urlswanted):
+        basefilename = urlwanted.split("/")[-1]
+        localfilename = cache_dir / f"ms/{basefilename}"
+        if i <= 4:  # Get the five newest files to ensure proper overlap
+            logger.debug(f"Fetching fresh copy of {localfilename}")
+            utils.save_if_good_url(localfilename, urlwanted)
+        else:
+            logger.debug(f"Getting copy of {localfilename} if needed")
+            utils.fetch_if_not_cached(localfilename, urlwanted)
+    pdffiles = sorted(cache.files(subdir="ms/", glob_pattern="*.pdf"))
+    headerfixes = {
+        "": "blank_entry",
+        "# Affected": "affected",
+        "# Of Notices Received": "notices_received",
+        "City": "city",
+        "Company Name": "company",
+        "Company Name (City) (County)": "company",
+        "Company Name (City) (County) (Zip)": "company",
+        "Company Name City (County)": "company",
+        "Company Name City, (County)": "company",
+        "Company Name, City (County)": "company",
+        "Company Name, City, County": "company",
+        "County": "county",
+        "Date of Action": "date_effective",
+        "Date of Notice": "date_notice",
+        "Date of WARN Notice": "date_notice",
+        "Event Number": "event_number",
+        "NAICS CODE & Description": "naics",
+        "NAICS CODE – Description": "naics",
+        "Notices Received": "notices_received",
+        "Number Of Notices Received": "notices_received",
+        "Number Of Notices Received October 2024 – December 2024": "notices_received",
+        "Number Affected": "affected",
+        "Reason / Comments": "reason",
+        "Reason – Comments": "reason",
+        "Type of Action": "action_type",
+        "Type of Action # Affected": "action_type",
+        "T ypes of Notice": "notice_types",
+        "T ypes of Notices Received": "notice_types",
+        "Type of Notice": "notice_types",
+        "Types of Notice": "notice_types",
+        "Types of Notices": "notice_types",
+        "Types of Notices Received": "notice_types",
+        "Workforc e Area": "workforce_area",
+        "Workforce Area": "workforce_area",
+        "_int_accuracy": "_int_accuracy",
+        "_int_data_items": "_int_data_items",
+        "_int_page": "_int_page",
+        "_int_pdf_filename": "_int_pdf_filename",
+        "_int_raw_fields": "_int_raw_fields",
+        "_int_table_number": "_int_table_number",
+        "supplement_0": "supplement_0",
+        "supplement_1": "supplement_1",
+        "supplement_2": "supplement_2",
+        "supplement_5": "affected",  # Only carries from 2025sq2
+    }
+    masterlist = []
+    rowholder = []
+    for pdffile in pdffiles:
+        locallist, localrows = pdfrodent.parse_pdf(pdffile, headerfixes)
+        masterlist.extend(locallist)
+        rowholder.extend(localrows)
+    # Identify all header elements, even in the ones we're about to remove.
+    allheaders = set()
+    for row in masterlist:
+        for item in row:
+            allheaders.add(item)
+    text = ""
+    for item in sorted(allheaders):
+        text += f"\t\t'{item}': ,\n"
+    with open(Path(cache_dir) / "ms/allheaders.txt", "w") as outfile:
+        outfile.write(text)
+    targetfilename = data_dir / "ms.csv"
+    logger.debug(f"Found {len(masterlist):,} extracted rows from the PDFs.")
+    cleaned = pdfrodent.drop_thin_rows(masterlist, 6)
+    logger.debug(
+        f"After filtering out thin rows, we have {len(cleaned):,} rows of data meeting standards."
+    )
+    # utils.write_disparate_dict_rows_to_csv(targetfilename, masterlist)
+    utils.write_disparate_dict_rows_to_csv(targetfilename, cleaned)
+    if want_debugging_file:
+        with open(Path(cache_dir) / "ms/debugging.txt", "w") as outfile:
+            for row in rowholder:
+                outfile.write(json.dumps(row) + "\r\n")
+    return targetfilename

warn/utils.py CHANGED Viewed

@@ -213,7 +213,7 @@ def write_rows_to_csv(output_path: Path, rows: list, mode="w"):
         mode (str): the mode to be used when opening the file (default 'w')
     """
     create_directory(output_path, is_file=True)
-    logger.debug(f"Writing {len(rows)} rows to {output_path}")
+    logger.debug(f"Writing {len(rows):,} rows to {output_path}")
     with open(output_path, mode, newline="", encoding="utf-8") as f:
         writer = csv.writer(f)
         writer.writerows(rows)
@@ -230,7 +230,7 @@ def write_dict_rows_to_csv(output_path, headers, rows, mode="w", extrasaction="r
         extrasaction (str): what to do if the if a field isn't in the headers (default 'raise')
     """
     create_directory(output_path, is_file=True)
-    logger.debug(f"Writing {len(rows)} rows to {output_path}")
+    logger.debug(f"Writing {len(rows):,} rows to {output_path}")
     with open(output_path, mode, newline="") as f:
         # Create the writer object
         writer = csv.DictWriter(f, fieldnames=headers, extrasaction=extrasaction)
@@ -243,22 +243,40 @@ def write_dict_rows_to_csv(output_path, headers, rows, mode="w", extrasaction="r
             writer.writerow(row)
-def write_disparate_dict_rows_to_csv(output_path, rows, mode="w"):
+def write_disparate_dict_rows_to_csv(
+    output_path, rows, mode="w", prefixes: None | list = None
+):
     """Write the provided list of dictionaries to the provided path as comma-separated values, while determining a header.
     Args:
         output_path (Path): the Path were the result will be saved
         rows (list): the list of dictionaries to be saved; can have disparate dict keys
         mode (str): the mode to be used when opening the file (default 'w')
+        prefixes(list|None): text strings that determine whether fields should arrive after other fields.
+            Send an empty list, [], to run without any prefixes.
+            Send None or don't send to use default prefixes of _int_ and int_
     """
+    if not prefixes:
+        prefixes = ["int_", "_int_"]
+    logger.debug(f"Writing {(len(rows)+1):,} rows to {output_path}")
     create_directory(output_path, is_file=True)
-    headers: set = set()  # Get all the potential header names
+    headers: list = []  # We want to preserve order, and set won't do it.
+    headerextras: list = []  # stuff that should be at the right of the field list
     for row in rows:
         for item in row:
-            headers.add(item)
-    headers = list(sorted(headers))
-    logger.debug(f"Found {len(headers):,} header entries in list of dicts.")
-    logger.debug(f"Writing {len(rows)} rows to {output_path}")
+            if item not in headers and item not in headerextras:
+                prefixhere = False
+                for prefix in prefixes:
+                    if item.startswith(prefix):
+                        prefixhere = True
+                if prefixhere:
+                    headerextras.append(item)
+                else:
+                    headers.append(item)
+    logger.debug(
+        f"Found {(len(headers) + len(headerextras)):,} header entries in the supplied list of dicts."
+    )
+    headers.extend(headerextras)
     with open(output_path, mode, newline="") as outfile:
         # Create the writer object
         writer = csv.writer(outfile)

{warn_scraper-1.2.151.dev0.dist-info → warn_scraper-1.2.153.dev0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: warn-scraper
-Version: 1.2.151.dev0
+Version: 1.2.153.dev0
 Summary: Command-line interface for downloading WARN Act notices of qualified plant closings and mass layoffs from state government websites
 Author-email: Big Local News <biglocalnews@stanford.edu>
 License-Expression: Apache-2.0

{warn_scraper-1.2.151.dev0.dist-info → warn_scraper-1.2.153.dev0.dist-info}/RECORD RENAMED Viewed

@@ -2,7 +2,9 @@ warn/__init__.py,sha256=A07JFY1TyaPtVIndBa7IvTk13DETqIkLgRdk0A-MCoE,85
 warn/cache.py,sha256=QBSHycchvRTkOQfHptOtZeTYiPgLP383jS8MTiGln_c,5969
 warn/cli.py,sha256=ZqyJwICdHFkn2hEgbArj_upbElR9-TSDlYDqyEGeexE,2019
 warn/runner.py,sha256=oeGRybGwpnkQKlPzRMlKxhsDt1GN4PZoX-vUwrsPgos,1894
-warn/utils.py,sha256=eF7lI2ObsagxBYPcOZc--DJp_vxA3Jl4nCcBs4C8zmc,12253
+warn/utils.py,sha256=-JF8DnSg-80CbCIswM-rtB0CWf9zSVU56iJNpRw3V-o,13086
+warn/pdfrodent/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+warn/pdfrodent/pdfrodent.py,sha256=IajvUyzVuUlph7F3LqaPU0HxDCkHb8YfnP1js4vOoTs,14632
 warn/platforms/__init__.py,sha256=wIZRDf4tbTuC8oKM4ZrTAtwNgbtMQGzPXMwDYCFyrog,81
 warn/platforms/job_center/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 warn/platforms/job_center/cache.py,sha256=yhA3sE46lNFg8vEewSoRYVByi0YSlkBiKm7qoSUiTdM,1868
@@ -11,7 +13,7 @@ warn/platforms/job_center/urls.py,sha256=IWhpuzN_xcNdHh23GbZPGvuHCsMcmb03qx3pRn1
 warn/platforms/job_center/utils.py,sha256=HdUKgKirmpPP7e4Cu_ZyB3zPVS_p-_ylo-lXFhxK2QM,5696
 warn/scrapers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 warn/scrapers/ak.py,sha256=h7BYMTV0whwWAPhbzVDVKMMoVCFphKly70aiTHabPq4,1847
-warn/scrapers/al.py,sha256=D0rT9GQ0vwfkRuveVAt-Po-T6b2TI1EPGeLOBy2m3_M,2240
+warn/scrapers/al.py,sha256=XSDEGC7F6_3GZ2m_uSiIG-1v8jMNH9pw_wNUCZyaMK0,2460
 warn/scrapers/az.py,sha256=elGbue01Gjf_DQ66Wy9qqGIOJsiY-KIKJOVeft8pCXg,1447
 warn/scrapers/ca.py,sha256=VQOfjHXPCc-jYwh-EPGVVfnzvXB7pdmCt2uJ6QnMPRM,8600
 warn/scrapers/co.py,sha256=83OdikIrWGxt22mlI-_zLSNqJg1NO5C2Xjm3FF6DPYY,18252
@@ -32,6 +34,7 @@ warn/scrapers/md.py,sha256=hwgxXQnhyBWm8qF1dvxIThAX1MkrZbXLwRI9inO5t8g,4060
 warn/scrapers/me.py,sha256=q36F4yJ7hvZsLayA3uBS1romo4X3Qf-sEi2Y7LAQCi8,1172
 warn/scrapers/mi.py,sha256=Ppyawp4nbzSBODuzDKeqnO9_9do5MFwK4Y_f3uc6blE,5846
 warn/scrapers/mo.py,sha256=wnnwQAiVPwuheMqptMXZpyQdiKNghhKwTO-Bnh9oXoU,3492
+warn/scrapers/ms.py,sha256=BZZoMw3TNtwzBeBsqKLsPznBcDjPaO49I5-yBU0e9AI,5502
 warn/scrapers/mt.py,sha256=t2MP4OCcuCEnrnvNgOu289P0eekZq4XaCK65qzgZX88,2457
 warn/scrapers/ne.py,sha256=JawuGJ3tCKvMd-N-p03gnltB4rol4QUJshMk2oyMPO4,4143
 warn/scrapers/nj.py,sha256=nwbMbeQuUJbYRVoyUyKZBmNqvqsXu3Habt-10r8DvZE,2230
@@ -51,9 +54,9 @@ warn/scrapers/va.py,sha256=7Nle7qL0VNPiE653XyaP9HQqSfuJFDRr2kEkjOqLvFM,11269
 warn/scrapers/vt.py,sha256=d-bo4WK2hkrk4BhCCmLpEovcoZltlvdIUB6O0uaMx5A,1186
 warn/scrapers/wa.py,sha256=UXdVtHZo_a-XfoiyOooTRfTb9W3PErSZdKca6SRORgs,4282
 warn/scrapers/wi.py,sha256=ClEzXkwZbop0W4fkQgsb5oHAPUrb4luUPGV-jOKwkcg,4855
-warn_scraper-1.2.151.dev0.dist-info/licenses/LICENSE,sha256=ZV-QHyqPwyMuwuj0lI05JeSjV1NyzVEk8Yeu7FPtYS0,585
-warn_scraper-1.2.151.dev0.dist-info/METADATA,sha256=AfbK5daYq_1v82cikgTIXPS7TsXjZgksMWwkDzahI0o,1780
-warn_scraper-1.2.151.dev0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
-warn_scraper-1.2.151.dev0.dist-info/entry_points.txt,sha256=poh_oSweObGlBSs1_2qZmnTodlOYD0KfO7-h7W2UQIw,47
-warn_scraper-1.2.151.dev0.dist-info/top_level.txt,sha256=dZfms6N3kqVXufiPOo7YqOrAcUtYfNH_oyGvYUk9FB4,5
-warn_scraper-1.2.151.dev0.dist-info/RECORD,,
+warn_scraper-1.2.153.dev0.dist-info/licenses/LICENSE,sha256=ZV-QHyqPwyMuwuj0lI05JeSjV1NyzVEk8Yeu7FPtYS0,585
+warn_scraper-1.2.153.dev0.dist-info/METADATA,sha256=wEonbrS1LWIOVFTuLq75_rlLbDGb2g3-7w30N1x_JAc,1780
+warn_scraper-1.2.153.dev0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
+warn_scraper-1.2.153.dev0.dist-info/entry_points.txt,sha256=poh_oSweObGlBSs1_2qZmnTodlOYD0KfO7-h7W2UQIw,47
+warn_scraper-1.2.153.dev0.dist-info/top_level.txt,sha256=dZfms6N3kqVXufiPOo7YqOrAcUtYfNH_oyGvYUk9FB4,5
+warn_scraper-1.2.153.dev0.dist-info/RECORD,,

{warn_scraper-1.2.151.dev0.dist-info → warn_scraper-1.2.153.dev0.dist-info}/WHEEL RENAMED Viewed

File without changes

{warn_scraper-1.2.151.dev0.dist-info → warn_scraper-1.2.153.dev0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{warn_scraper-1.2.151.dev0.dist-info → warn_scraper-1.2.153.dev0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{warn_scraper-1.2.151.dev0.dist-info → warn_scraper-1.2.153.dev0.dist-info}/top_level.txt RENAMED Viewed

File without changes

warn-scraper 1.2.151.dev0__py3-none-any.whl → 1.2.153.dev0__py3-none-any.whl

warn-scraper 1.2.151.dev0py3-none-any.whl → 1.2.153.dev0py3-none-any.whl