PyPI - zipcode-features - Versions diffs - 0.0.2__py3-none-any.whl → 0.0.5__py3-none-any.whl - Mend

zipcode-features 0.0.2py3-none-any.whl → 0.0.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

zipcode_features/__init__.py CHANGED Viewed

@@ -1,9 +1,44 @@
-__version__ = '0.0.2'
+__version__ = '0.0.5'
 import zipcodes
-from uszipcode.search import SearchEngine
+from zipcode3.search import SearchEngine
 import pandas as pd
+import json
+def zipcode_mapper(x):
+    if x["ZIP_len"] == 3:
+        return "00" + x["ZIP"]
+    elif x["ZIP_len"] == 4:
+        return "0" + x["ZIP"]
+    else:
+        return x["ZIP"]
+def _get_zip_to_cbsa_code() -> dict:
+    """
+    This method gets a mapping from zipcode to cbsa code
+    mapping is of the form:
+    {"zip code": "cbsa code"}
+    """
+    df = pd.read_csv("CBSA_ZIP_122025.csv", dtype={'ZIP': str, "CBSA": str})
+    df["ZIP_len"] = df["ZIP"].apply(lambda x: len(x))
+    df["ZIP"] = df.apply(zipcode_mapper, axis=1)
+    return df[["ZIP", "CBSA"]].to_dict()
+def _get_cbsa_code_to_cbsa_name() -> dict:
+    """
+    This method gets a mapping from cbsa code to name
+    {cbsa code: cbsa name}
+    """
+    code_to_name = json.load(open("cbsa_codes.json"))
+    df = pd.DataFrame(columns=["code", "name"])
+    df["name"] = code_to_name.values()
+    df["code"] = code_to_name.keys()
+    df["name"] = df["name"].str.replace(" -", "-")
+    df["name"] = df["name"].str.split().str.join(' ')
+    return df.to_dict()
 def us_get_demographics(state: str, city: str = None, zip_list: list = None) -> pd.DataFrame:
     """
     This gets demographic information for associated with zipcodes in the United States of America.
@@ -43,4 +78,10 @@ def us_get_demographics(state: str, city: str = None, zip_list: list = None) ->
         tmp_dict = zipcode_and_demo[index][1].to_dict()
         tmp_dict["zip_code"] = zipcode_and_demo[index][0]
         demographics.append(tmp_dict)
-    return pd.DataFrame(demographics)
+    df = pd.DataFrame(demographics)
+    zip_to_cbsa = _get_zip_to_cbsa_code()
+    df["cbsa"] = df["zip_code"].map(zip_to_cbsa)
+    cbsa_code_to_name = _get_cbsa_code_to_cbsa_name()
+    df["cbsa_name"] = df["cbsa"].map(cbsa_code_to_name)
+    return df

zipcode_features-0.0.5.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,203 @@
+Metadata-Version: 2.4
+Name: zipcode_features
+Version: 0.0.5
+Summary: A tool to get features based on census data from zipcodes
+Home-page: https://github.com/EricSchles/zipcode_features
+Author: Eric Schles
+Author-email: ericschles@gmail.com
+License: MIT
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.6
+Classifier: Programming Language :: Python :: 3.7
+Classifier: Programming Language :: Python :: 3.8
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: zipcodes
+Requires-Dist: pandas
+Requires-Dist: zipcode3
+Dynamic: author
+Dynamic: author-email
+Dynamic: classifier
+Dynamic: description
+Dynamic: description-content-type
+Dynamic: home-page
+Dynamic: license
+Dynamic: license-file
+Dynamic: requires-dist
+Dynamic: summary
+# zipcode features
+similar to [uszipcode-project](https://github.com/EricSchles/uszipcode-project)
+## Getting CBSA mapping
+If you need CBSA data you can append it to the dataframe with the following example:
+```python
+from zipcode_features import us_get_demographics
+import pandas as pd
+def _get_cbsa_data():
+    return pd.read_excel(
+        "https://github.com/EricSchles/zipcode_features/raw/refs/heads/main/zipcode_features/CBSA_ZIP_122025.xlsx",
+        sheet_name='Export Worksheet'
+    )[["CBSA", "ZIP"]]
+demo = us_get_demographics(state="NY")
+cbsa_zip_map = _get_cbsa_data()
+df = pd.merge(demo, cbsa_zip_map, how="left", left_on="zipcode", right_on="ZIP")
+```
+For the semantic names you can get them [here](https://www2.census.gov/programs-surveys/cps/methodology/2015%20Geography%20Cover.pdf).
+Here's a python script to parse them:
+```python
+import urllib.request
+import PyPDF2
+import json
+import re
+import io
+def fetch_cbsa_to_json():
+    url = "https://www2.census.gov/programs-surveys/cps/methodology/2015%20Geography%20Cover.pdf"
+    print("Downloading Census PDF...")
+    # Using a User-Agent to ensure the request isn't blocked by the server
+    req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'})
+    try:
+        response = urllib.request.urlopen(req)
+        pdf_bytes = io.BytesIO(response.read())
+    except Exception as e:
+        print(f"Failed to download PDF: {e}")
+        return
+    print("Parsing PDF...")
+    reader = PyPDF2.PdfReader(pdf_bytes)
+    cbsa_mapping = {}
+    # Regular expression to match a 5-digit FIPS/CBSA code followed by the area name
+    # Example match: "11460 Ann Arbor, MI"
+    pattern = re.compile(r'\b(\d{5})\s+(.+?)(?=\s+\d{5}|\n|$)')
+    for page in reader.pages:
+        text = page.extract_text()
+        if text:
+            matches = pattern.findall(text)
+            for code, name in matches:
+                # Clean up any trailing spaces or artifacts
+                clean_name = name.strip()
+                # Exclude standalone numbers or random headers that might get caught
+                if len(clean_name) > 2 and not clean_name.isdigit():
+                    cbsa_mapping[code] = clean_name
+    print(f"Extracted {len(cbsa_mapping)} CBSA codes.")
+    # Save the mapping to a JSON file
+    output_file = 'cbsa_codes.json'
+    with open(output_file, 'w', encoding='utf-8') as f:
+        json.dump(cbsa_mapping, f, indent=4)
+    print(f"Successfully saved to {output_file}")
+if __name__ == "__main__":
+    fetch_cbsa_to_json()
+```
+Here's a working example for using this with the above:
+```python
+import requests
+from zipcode_features import us_get_demographics
+import pandas as pd
+def _get_cbsa_data():
+    return pd.read_excel(
+        "https://github.com/EricSchles/zipcode_features/raw/refs/heads/main/zipcode_features/CBSA_ZIP_122025.xlsx",
+        sheet_name='Export Worksheet'
+    )[["CBSA", "ZIP"]]
+demo = us_get_demographics(state="NY")
+cbsa_zip_map = _get_cbsa_data()
+df = pd.merge(demo, cbsa_zip_map, how="left", left_on="zipcode", right_on="ZIP")
+df = df.drop("ZIP", axis=1)
+mapping = requests.get("https://raw.githubusercontent.com/EricSchles/zipcode_features/refs/heads/main/zipcode_features/cbsa_codes.json").json()
+df["cbsa_name"] = df["CBSA"].map(mapping)
+df = df.drop("CBSA", axis=1)
+```
+## Adding County
+```python
+from zipcode_features import us_get_demographics
+import pandas as pd
+def _get_fips_data():
+    df = pd.read_excel(
+        "https://github.com/EricSchles/zipcode_features/raw/refs/heads/main/zipcode_features/ZIP_COUNTY_122025.xlsx",
+	dtype={'ZIP': 'str'},
+        sheet_name='Export Worksheet'
+    )[["COUNTY", "ZIP"]]
+    df["COUNTY"] = df['COUNTY'].astype(str)
+    return df.dropna()
+demo = us_get_demographics(state="NY")
+fips_zip_map = _get_fips_data()
+df = pd.merge(demo, fips_zip_map, how="left", left_on="zipcode", right_on="ZIP")
+df = df.drop("ZIP", axis=1)
+df = df.dropna()
+```
+## Adding Regional Prices
+```bash
+python -m pip install beaapi us
+```
+```python
+from zipcode_features import us_get_demographics
+import pandas as pd
+import beaapi
+import us
+df = us_get_demographics(state="NY")
+# get your key here: https://apps.bea.gov/API/signup/
+beakey = ""
+dataset="Regional"
+table = "SARPP"
+regional_cpi = beaapi.get_data(
+    userid=beakey,
+    method='GetData',
+    datasetname=dataset, # National Income and Product Accounts
+    tablename=table, # Table 1.1.1
+    GeoFips="STATE",
+    LineCode="1",
+    ResultFormat="json"
+    #Frequency='A',      # Annual data
+)[["GeoName", "DataValue"]]
+regional_cpi = regional_cpi[regional_cpi["GeoName"] != "United States"]
+regional_cpi["year"] = ["2020", "2021", "2022", "2023", "2024"] * 51
+abbreviations_map = us.states.mapping('name', 'abbr')
+regional_cpi["state"] = regional_cpi["GeoName"].map(abbreviations_map)
+regional_cpi["cpi"] = regional_cpi["DataValue"]
+regional_cpi = regional_cpi.drop("DataValue", axis=1)
+regional_cpi = regional_cpi[regional_cpi["year"] == "2024"]
+regional_cpi["cpi_year"] = regional_cpi["year"]
+regional_cpi.drop("year", axis=1)
+df = pd.merge(df, regional_cpi, how='left', on="state")
+df["regional_cpi"] = df["cpi"]
+df = df.drop("cpi", axis=1)
+```

zipcode_features-0.0.5.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,6 @@
+zipcode_features/__init__.py,sha256=H4M7B3fzFk-FEgIbfZMdhNP9w4yMQZrAy7ZG0z3sHMs,2727
+zipcode_features-0.0.5.dist-info/licenses/LICENSE,sha256=HDbMJ7oItmxTn3jVtZFi6jUFAHovset5jzAPUderjOc,1073
+zipcode_features-0.0.5.dist-info/METADATA,sha256=beUdSoQfPbegVeQFypOIpsKhHqTTSDCipCK0JdOdTvM,6238
+zipcode_features-0.0.5.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
+zipcode_features-0.0.5.dist-info/top_level.txt,sha256=ijGAxdXHaO43tVlCj3Kn05dj-hkXAv1pXBEq1Yj8mt0,17
+zipcode_features-0.0.5.dist-info/RECORD,,

{zipcode_features-0.0.2.dist-info → zipcode_features-0.0.5.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (80.9.0)
+Generator: setuptools (82.0.1)
 Root-Is-Purelib: true
 Tag: py3-none-any

zipcode_features-0.0.2.dist-info/METADATA DELETED Viewed

@@ -1,34 +0,0 @@
-Metadata-Version: 2.4
-Name: zipcode_features
-Version: 0.0.2
-Summary: A tool to get features based on census data from zipcodes
-Home-page: https://github.com/EricSchles/zipcode_features
-Author: Eric Schles
-Author-email: ericschles@gmail.com
-License: MIT
-Classifier: License :: OSI Approved :: MIT License
-Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3.6
-Classifier: Programming Language :: Python :: 3.7
-Classifier: Programming Language :: Python :: 3.8
-Classifier: Programming Language :: Python :: 3.9
-Classifier: Programming Language :: Python :: 3.10
-Classifier: Programming Language :: Python :: 3.11
-Description-Content-Type: text/markdown
-License-File: LICENSE
-Requires-Dist: zipcodes
-Requires-Dist: pandas
-Dynamic: author
-Dynamic: author-email
-Dynamic: classifier
-Dynamic: description
-Dynamic: description-content-type
-Dynamic: home-page
-Dynamic: license
-Dynamic: license-file
-Dynamic: requires-dist
-Dynamic: summary
-# zipcode features
-similar to [uszipcode-project](https://github.com/EricSchles/uszipcode-project)

zipcode_features-0.0.2.dist-info/RECORD DELETED Viewed

@@ -1,6 +0,0 @@
-zipcode_features/__init__.py,sha256=TCSZkr4RP81Nl3lGTfoRMwJa_ASA4pOGfpOPwd-eRZE,1475
-zipcode_features-0.0.2.dist-info/licenses/LICENSE,sha256=HDbMJ7oItmxTn3jVtZFi6jUFAHovset5jzAPUderjOc,1073
-zipcode_features-0.0.2.dist-info/METADATA,sha256=RK72-UXOg_G_v44_BOwALk8TZSVpsGtGlLi-KtI4BZw,1074
-zipcode_features-0.0.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-zipcode_features-0.0.2.dist-info/top_level.txt,sha256=ijGAxdXHaO43tVlCj3Kn05dj-hkXAv1pXBEq1Yj8mt0,17
-zipcode_features-0.0.2.dist-info/RECORD,,

{zipcode_features-0.0.2.dist-info → zipcode_features-0.0.5.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{zipcode_features-0.0.2.dist-info → zipcode_features-0.0.5.dist-info}/top_level.txt RENAMED Viewed

File without changes

zipcode-features 0.0.2__py3-none-any.whl → 0.0.5__py3-none-any.whl

zipcode-features 0.0.2py3-none-any.whl → 0.0.5py3-none-any.whl