zipcode-features 0.0.2__py3-none-any.whl → 0.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,44 @@
1
- __version__ = '0.0.2'
1
+ __version__ = '0.0.5'
2
2
 
3
3
  import zipcodes
4
- from uszipcode.search import SearchEngine
4
+ from zipcode3.search import SearchEngine
5
5
  import pandas as pd
6
+ import json
6
7
 
8
+ def zipcode_mapper(x):
9
+ if x["ZIP_len"] == 3:
10
+ return "00" + x["ZIP"]
11
+ elif x["ZIP_len"] == 4:
12
+ return "0" + x["ZIP"]
13
+ else:
14
+ return x["ZIP"]
15
+
16
+ def _get_zip_to_cbsa_code() -> dict:
17
+ """
18
+ This method gets a mapping from zipcode to cbsa code
19
+ mapping is of the form:
20
+ {"zip code": "cbsa code"}
21
+ """
22
+
23
+ df = pd.read_csv("CBSA_ZIP_122025.csv", dtype={'ZIP': str, "CBSA": str})
24
+ df["ZIP_len"] = df["ZIP"].apply(lambda x: len(x))
25
+ df["ZIP"] = df.apply(zipcode_mapper, axis=1)
26
+ return df[["ZIP", "CBSA"]].to_dict()
27
+
28
+ def _get_cbsa_code_to_cbsa_name() -> dict:
29
+ """
30
+ This method gets a mapping from cbsa code to name
31
+ {cbsa code: cbsa name}
32
+ """
33
+ code_to_name = json.load(open("cbsa_codes.json"))
34
+ df = pd.DataFrame(columns=["code", "name"])
35
+ df["name"] = code_to_name.values()
36
+ df["code"] = code_to_name.keys()
37
+ df["name"] = df["name"].str.replace(" -", "-")
38
+ df["name"] = df["name"].str.split().str.join(' ')
39
+ return df.to_dict()
40
+
41
+
7
42
  def us_get_demographics(state: str, city: str = None, zip_list: list = None) -> pd.DataFrame:
8
43
  """
9
44
  This gets demographic information for associated with zipcodes in the United States of America.
@@ -43,4 +78,10 @@ def us_get_demographics(state: str, city: str = None, zip_list: list = None) ->
43
78
  tmp_dict = zipcode_and_demo[index][1].to_dict()
44
79
  tmp_dict["zip_code"] = zipcode_and_demo[index][0]
45
80
  demographics.append(tmp_dict)
46
- return pd.DataFrame(demographics)
81
+ df = pd.DataFrame(demographics)
82
+ zip_to_cbsa = _get_zip_to_cbsa_code()
83
+ df["cbsa"] = df["zip_code"].map(zip_to_cbsa)
84
+ cbsa_code_to_name = _get_cbsa_code_to_cbsa_name()
85
+ df["cbsa_name"] = df["cbsa"].map(cbsa_code_to_name)
86
+ return df
87
+
@@ -0,0 +1,203 @@
1
+ Metadata-Version: 2.4
2
+ Name: zipcode_features
3
+ Version: 0.0.5
4
+ Summary: A tool to get features based on census data from zipcodes
5
+ Home-page: https://github.com/EricSchles/zipcode_features
6
+ Author: Eric Schles
7
+ Author-email: ericschles@gmail.com
8
+ License: MIT
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.6
12
+ Classifier: Programming Language :: Python :: 3.7
13
+ Classifier: Programming Language :: Python :: 3.8
14
+ Classifier: Programming Language :: Python :: 3.9
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Description-Content-Type: text/markdown
18
+ License-File: LICENSE
19
+ Requires-Dist: zipcodes
20
+ Requires-Dist: pandas
21
+ Requires-Dist: zipcode3
22
+ Dynamic: author
23
+ Dynamic: author-email
24
+ Dynamic: classifier
25
+ Dynamic: description
26
+ Dynamic: description-content-type
27
+ Dynamic: home-page
28
+ Dynamic: license
29
+ Dynamic: license-file
30
+ Dynamic: requires-dist
31
+ Dynamic: summary
32
+
33
+ # zipcode features
34
+
35
+ similar to [uszipcode-project](https://github.com/EricSchles/uszipcode-project)
36
+
37
+ ## Getting CBSA mapping
38
+
39
+ If you need CBSA data you can append it to the dataframe with the following example:
40
+
41
+ ```python
42
+ from zipcode_features import us_get_demographics
43
+ import pandas as pd
44
+
45
+ def _get_cbsa_data():
46
+ return pd.read_excel(
47
+ "https://github.com/EricSchles/zipcode_features/raw/refs/heads/main/zipcode_features/CBSA_ZIP_122025.xlsx",
48
+ sheet_name='Export Worksheet'
49
+ )[["CBSA", "ZIP"]]
50
+
51
+ demo = us_get_demographics(state="NY")
52
+ cbsa_zip_map = _get_cbsa_data()
53
+ df = pd.merge(demo, cbsa_zip_map, how="left", left_on="zipcode", right_on="ZIP")
54
+ ```
55
+
56
+ For the semantic names you can get them [here](https://www2.census.gov/programs-surveys/cps/methodology/2015%20Geography%20Cover.pdf).
57
+
58
+ Here's a python script to parse them:
59
+
60
+ ```python
61
+ import urllib.request
62
+ import PyPDF2
63
+ import json
64
+ import re
65
+ import io
66
+
67
+ def fetch_cbsa_to_json():
68
+ url = "https://www2.census.gov/programs-surveys/cps/methodology/2015%20Geography%20Cover.pdf"
69
+
70
+ print("Downloading Census PDF...")
71
+ # Using a User-Agent to ensure the request isn't blocked by the server
72
+ req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'})
73
+
74
+ try:
75
+ response = urllib.request.urlopen(req)
76
+ pdf_bytes = io.BytesIO(response.read())
77
+ except Exception as e:
78
+ print(f"Failed to download PDF: {e}")
79
+ return
80
+
81
+ print("Parsing PDF...")
82
+ reader = PyPDF2.PdfReader(pdf_bytes)
83
+
84
+ cbsa_mapping = {}
85
+
86
+ # Regular expression to match a 5-digit FIPS/CBSA code followed by the area name
87
+ # Example match: "11460 Ann Arbor, MI"
88
+ pattern = re.compile(r'\b(\d{5})\s+(.+?)(?=\s+\d{5}|\n|$)')
89
+
90
+ for page in reader.pages:
91
+ text = page.extract_text()
92
+ if text:
93
+ matches = pattern.findall(text)
94
+ for code, name in matches:
95
+ # Clean up any trailing spaces or artifacts
96
+ clean_name = name.strip()
97
+ # Exclude standalone numbers or random headers that might get caught
98
+ if len(clean_name) > 2 and not clean_name.isdigit():
99
+ cbsa_mapping[code] = clean_name
100
+
101
+ print(f"Extracted {len(cbsa_mapping)} CBSA codes.")
102
+
103
+ # Save the mapping to a JSON file
104
+ output_file = 'cbsa_codes.json'
105
+ with open(output_file, 'w', encoding='utf-8') as f:
106
+ json.dump(cbsa_mapping, f, indent=4)
107
+
108
+ print(f"Successfully saved to {output_file}")
109
+
110
+ if __name__ == "__main__":
111
+ fetch_cbsa_to_json()
112
+ ```
113
+
114
+ Here's a working example for using this with the above:
115
+
116
+ ```python
117
+ import requests
118
+ from zipcode_features import us_get_demographics
119
+ import pandas as pd
120
+
121
+ def _get_cbsa_data():
122
+ return pd.read_excel(
123
+ "https://github.com/EricSchles/zipcode_features/raw/refs/heads/main/zipcode_features/CBSA_ZIP_122025.xlsx",
124
+ sheet_name='Export Worksheet'
125
+ )[["CBSA", "ZIP"]]
126
+
127
+ demo = us_get_demographics(state="NY")
128
+ cbsa_zip_map = _get_cbsa_data()
129
+ df = pd.merge(demo, cbsa_zip_map, how="left", left_on="zipcode", right_on="ZIP")
130
+ df = df.drop("ZIP", axis=1)
131
+ mapping = requests.get("https://raw.githubusercontent.com/EricSchles/zipcode_features/refs/heads/main/zipcode_features/cbsa_codes.json").json()
132
+ df["cbsa_name"] = df["CBSA"].map(mapping)
133
+ df = df.drop("CBSA", axis=1)
134
+ ```
135
+
136
+ ## Adding County
137
+
138
+
139
+ ```python
140
+ from zipcode_features import us_get_demographics
141
+ import pandas as pd
142
+
143
+ def _get_fips_data():
144
+ df = pd.read_excel(
145
+ "https://github.com/EricSchles/zipcode_features/raw/refs/heads/main/zipcode_features/ZIP_COUNTY_122025.xlsx",
146
+ dtype={'ZIP': 'str'},
147
+ sheet_name='Export Worksheet'
148
+ )[["COUNTY", "ZIP"]]
149
+ df["COUNTY"] = df['COUNTY'].astype(str)
150
+ return df.dropna()
151
+
152
+ demo = us_get_demographics(state="NY")
153
+ fips_zip_map = _get_fips_data()
154
+ df = pd.merge(demo, fips_zip_map, how="left", left_on="zipcode", right_on="ZIP")
155
+ df = df.drop("ZIP", axis=1)
156
+ df = df.dropna()
157
+ ```
158
+
159
+ ## Adding Regional Prices
160
+
161
+ ```bash
162
+ python -m pip install beaapi us
163
+ ```
164
+
165
+ ```python
166
+ from zipcode_features import us_get_demographics
167
+ import pandas as pd
168
+ import beaapi
169
+ import us
170
+
171
+ df = us_get_demographics(state="NY")
172
+
173
+ # get your key here: https://apps.bea.gov/API/signup/
174
+ beakey = ""
175
+
176
+ dataset="Regional"
177
+ table = "SARPP"
178
+ regional_cpi = beaapi.get_data(
179
+ userid=beakey,
180
+ method='GetData',
181
+ datasetname=dataset, # National Income and Product Accounts
182
+ tablename=table, # Table 1.1.1
183
+ GeoFips="STATE",
184
+ LineCode="1",
185
+ ResultFormat="json"
186
+ #Frequency='A', # Annual data
187
+ )[["GeoName", "DataValue"]]
188
+ regional_cpi = regional_cpi[regional_cpi["GeoName"] != "United States"]
189
+ regional_cpi["year"] = ["2020", "2021", "2022", "2023", "2024"] * 51
190
+ abbreviations_map = us.states.mapping('name', 'abbr')
191
+ regional_cpi["state"] = regional_cpi["GeoName"].map(abbreviations_map)
192
+ regional_cpi["cpi"] = regional_cpi["DataValue"]
193
+ regional_cpi = regional_cpi.drop("DataValue", axis=1)
194
+ regional_cpi = regional_cpi[regional_cpi["year"] == "2024"]
195
+ regional_cpi["cpi_year"] = regional_cpi["year"]
196
+ regional_cpi.drop("year", axis=1)
197
+ df = pd.merge(df, regional_cpi, how='left', on="state")
198
+ df["regional_cpi"] = df["cpi"]
199
+ df = df.drop("cpi", axis=1)
200
+ ```
201
+
202
+
203
+
@@ -0,0 +1,6 @@
1
+ zipcode_features/__init__.py,sha256=H4M7B3fzFk-FEgIbfZMdhNP9w4yMQZrAy7ZG0z3sHMs,2727
2
+ zipcode_features-0.0.5.dist-info/licenses/LICENSE,sha256=HDbMJ7oItmxTn3jVtZFi6jUFAHovset5jzAPUderjOc,1073
3
+ zipcode_features-0.0.5.dist-info/METADATA,sha256=beUdSoQfPbegVeQFypOIpsKhHqTTSDCipCK0JdOdTvM,6238
4
+ zipcode_features-0.0.5.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
5
+ zipcode_features-0.0.5.dist-info/top_level.txt,sha256=ijGAxdXHaO43tVlCj3Kn05dj-hkXAv1pXBEq1Yj8mt0,17
6
+ zipcode_features-0.0.5.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.9.0)
2
+ Generator: setuptools (82.0.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,34 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: zipcode_features
3
- Version: 0.0.2
4
- Summary: A tool to get features based on census data from zipcodes
5
- Home-page: https://github.com/EricSchles/zipcode_features
6
- Author: Eric Schles
7
- Author-email: ericschles@gmail.com
8
- License: MIT
9
- Classifier: License :: OSI Approved :: MIT License
10
- Classifier: Programming Language :: Python :: 3
11
- Classifier: Programming Language :: Python :: 3.6
12
- Classifier: Programming Language :: Python :: 3.7
13
- Classifier: Programming Language :: Python :: 3.8
14
- Classifier: Programming Language :: Python :: 3.9
15
- Classifier: Programming Language :: Python :: 3.10
16
- Classifier: Programming Language :: Python :: 3.11
17
- Description-Content-Type: text/markdown
18
- License-File: LICENSE
19
- Requires-Dist: zipcodes
20
- Requires-Dist: pandas
21
- Dynamic: author
22
- Dynamic: author-email
23
- Dynamic: classifier
24
- Dynamic: description
25
- Dynamic: description-content-type
26
- Dynamic: home-page
27
- Dynamic: license
28
- Dynamic: license-file
29
- Dynamic: requires-dist
30
- Dynamic: summary
31
-
32
- # zipcode features
33
-
34
- similar to [uszipcode-project](https://github.com/EricSchles/uszipcode-project)
@@ -1,6 +0,0 @@
1
- zipcode_features/__init__.py,sha256=TCSZkr4RP81Nl3lGTfoRMwJa_ASA4pOGfpOPwd-eRZE,1475
2
- zipcode_features-0.0.2.dist-info/licenses/LICENSE,sha256=HDbMJ7oItmxTn3jVtZFi6jUFAHovset5jzAPUderjOc,1073
3
- zipcode_features-0.0.2.dist-info/METADATA,sha256=RK72-UXOg_G_v44_BOwALk8TZSVpsGtGlLi-KtI4BZw,1074
4
- zipcode_features-0.0.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
5
- zipcode_features-0.0.2.dist-info/top_level.txt,sha256=ijGAxdXHaO43tVlCj3Kn05dj-hkXAv1pXBEq1Yj8mt0,17
6
- zipcode_features-0.0.2.dist-info/RECORD,,